********************************************************************************
*** 5estimations.do**********************************************************
*** last change: 2023/06/15 (LH)************************************************
********************************************************************************	
* 5estimations contains the empirical analysis for the transition data.
* (LH June 2023: file slightly edited for data protection reasons ahead of publishing on dataverse)  

/* ------------------------------------------------------------------------ */
/* Contents of this file:	 												*/
/* ------------------------------------------------------------------------ */
 
	* Select spells where aged 18 and older
	* (00) Define additional variables
	* (0a) Investigate black holes and ATZ case (person103==1)
	* (0b) Investigate difference between sample size of potret & sample size of people retiring	
	
	* (1) Define transition variables 
	*		- definitions here equivalent to 4transitions
	*
	* (2) Job-to-job transitions 
	*		- compare wage distributions before and after job-to-job transition
	*		- 2.1. JAERE revision: which sector do people join after leaving lignite?
	*		-      
	* (3) Save dat pre collating
	* 		-> SAVE PRECOLL.DTA	

	* (4) Collate durations from sequences of jobs to estimate transition parameters 
	* 		-> SAVE POSTCOLL:DTA

	* (5) estimation for the whole sample
	/*		a) rho: retirement probability (pretrans 1 and 6) - (previously called mu)
				 - rhodirect: direct retirement from lignite
				 - rhoindirect: indirect retirement from lignite
	
			b) delta: job loss rate (pretrans 7 and 8) (becoming unemployed)
				 - delta: unemployment after any job
				 - deltalig: unemployment after lignite
				 - deltanonlig: unemployment after nonlignite
			
			c) lambda: job arrival rate / job finding rate (pretrans 7 and 8, 11 and 12) (finding a job if unemployed)
				 - lambdalig: finding a job in lignite if unemployed after lignite (although =0 by assumption)
				 - lambdanonlig: finding a job in non-lignite if unemployed  after lignite
				 - lambdazerolig: finding a job in non-lignite if unemployed after non lignite				 
	*/
	*		- 5.1 without taking into account any kind of censoring
	*		- 5.2 taking into account any right censoring due to black hole and end of spell without transition
	
	* (6) To create cells : cut spells crossing macro periods 
	*		- 6.1 Identify change in macro labour market conditions and cut spells crossing macro periods* 
	*		- 6.2 Create variable MACRO for macro scenarios 
	* 		- 6.3 Recalculate age and decades at the end of spell, REDEFINE AGEEND AND AGECAT2
	*		- 6.4 Exposure time for estimation
	
	* (7) In order to choose cell cizes: 
	/*		a) Number of observations in cells according to personal characteristics
				- gender (2 categories) (var FRAU)
				- education (2 categories) (var EDUC2)
				- age (3 categories) (var AGECAT2)
				- macro conditions (2 categories) (var MACRO)
			b) Number of TRANSITIONS in cells according to (same) personal characteristics
				- to retirement 	(pretrans 1 + 6)
				- to unemployment	(pretrans 7)
				- from unemp to job	(pretrans 8)
				- NOT NECESSARY: (pretrans 3 - job-to-job)
	*/	
	* 		-> SAVE POSTCOLL_CELL.DTA
		
	* (8) Estimate parameters by cell
				 
	* (9) Wage offer distribution (by cell) 											
		/*	(9.0) Reload data before collating spells and define cells again : 
		USE PRECOLL.DTA
		KEEP only spells with info on wage at the beggining or at the end of the spell
		CREATE var MACRO_WAGE (different point in time for lignite and non lignite spells)
		CREATE CELL 
		-> SAVE WAGE_SAMPLE.DTA	
		* NB. tihs sample cannot really be used for analysis of wage growth, as only one wage per transition story
			(9.1) wc (wcoal) - characterize distribution (normal and lognormal)  -> wage that would be paid in coal (= wage before transition to unemployment)
			(9.2) w' (woffer) - characterize distribution (normal and lognormal) -> wage offered in another job ( = wage after unemployment)
			(9.3) assume w' is a function of wc and estimate w' for each cell by simple linear regression
			(9.4) expected value of continued employment (back-of-the-envelope calculations, using the values estimated for ÃŽÂ», Ã‚Âµ and ÃŽÂ´)	
	
		* (9.5) Elements for JAERE revision
		*	 (9.5.1) Evolution of coal versus non-coal wages over time
		*	 (9.5.2) Distribution of first age of working in data
		*	 (9.5.3) wage growth profiles of workers in different age ranges - by age ranges
*/

*** TABULA RASA ***
clear all
cd ${main}

*** FILENAME & FILE-SPECIFIC GLOBALS ***
glo filename 	5estimations
glo date 		= string( d(`c(current_date)'), "%tdCYND" )

*** SETTINGS ***
set more off 
set linesize 200        
set rmsg on            
set maxvar 32767, permanently
set excelxlsxlargefile on 

*** START LOG-FILE ***
if ${log_active}==1 {
	mac list date
	capture log close
	log using ${log}/${filename}_${sample}_${date}.log, replace text
	}
*** Note which sample ***
di "NB. this log-file reporting results for sample number: " ${sample}
	
	
*** OPEN DATA FILE ***
* sample global takes values (1), (2), (3), (4) - see data doc for information on different samples
* sample global value (7-9) is for JAERE revision, uses sample sample 2 but  
* using only post-2000 data (sample==(2)) apart from that
if ${sample}<7{
use ${data}\postprep_${sample}.dta, clear
}
if ${sample}>=7{
use ${data}\postprep_2.dta, clear
}

* use same varnames in IAB and test data
 if ${iab}==1{      
              cap rename wz08_kons_num wz08
			  * g pid=persnr
              }

* The command "put excel close" does not work on LH version of Stata
if $iab==1{
	local putexcelclose="putexcel close"
else 
	local putexcelclose=""
	}

/* ------------------------------------------------------------------------ */
 * 	NEW : KEEP ONLY WHEN 18 Years old or older
/* ------------------------------------------------------------------------ */
/***** Recalculate age at the end of spell ******/
tab ageend, m		
gen jahrend = year(endepi)
label var jahrend "year at end of spell"
replace ageend = jahrend - year(geb_dat)
label var ageend "age at end of spell"
di "tab age at end of last spell of sampled workers"
tab ageend, m

keep if ageend >= 18 


/* ------------------------------------------------------------------------ */
 * (00) Define additional variables for characteristics
/* ------------------------------------------------------------------------ */	
	/* 	* mining_area_bin	binary indicator for mining_area (east/west)
		* educ2				education categories								
		* agecat2			broad age categories			*/

	* Binary mining area indicator (east / west)	
			gen mining_area_bin = .

			replace mining_area_bin = 1 if mining_area == 1 //Lausitzer Revier
			replace mining_area_bin = 1 if mining_area == 2 //Mitteldeutsches Revier
			replace mining_area_bin = 1 if ao_kreis == 03153 // LK Goslar				// 75,067
			replace mining_area_bin = 1 if ao_kreis == 13073 // LK Vorpommern-RÃƒÂ¼gen		// 20,402
			replace mining_area_bin = 1 if ao_kreis == 14521 // LK Erzgebirgskreis		// 59,263
			replace mining_area_bin = 1 if ao_kreis == 11000 // Stadt Berlin			// 158,323
			
			replace mining_area_bin = 2 if mining_area == 3 //Helmstedter Revier
			replace mining_area_bin = 2 if mining_area == 4 //Rheinisches Revier
			replace mining_area_bin = 2 if ao_kreis == 03241 // Region Hannover			// 372,694
			replace mining_area_bin = 2 if ao_kreis == 05570 // LK Warendorf			// 189,594
			replace mining_area_bin = 2 if ao_kreis == 06440 // LK Wetteraukreis		// 49,629
			replace mining_area_bin = 2 if ao_kreis == 06611 // Stadt Kassel			// 106,379
			replace mining_area_bin = 2 if ao_kreis == 06634 // LK Schwalm-Eder-Kreis	// 77,271
			replace mining_area_bin = 2 if ao_kreis == 06636 // LK Werra-MeiÃƒÅ¸ner-Kreis	// 41,551
			replace mining_area_bin = 2 if ao_kreis == 09162 // Stadt MÃƒÂ¼nchen			// 155,443
			replace mining_area_bin = 2 if ao_kreis == 09376 // LK Schwandorf			// 68,832
			replace mining_area_bin = 2 if ao_kreis == 09672 // LK Bad Kissingen		// 12,181
			replace mining_area_bin = 2 if ao_kreis == 10041 // Regionalverband SaarbrÃƒÂ¼cken	// 758,671
			replace mining_area_bin = 2 if ao_kreis == 10044 // LK Saarlouis			// 245,729

			label define areas_bin 1 "East" 2 "West"
			label variable mining_area_bin "Mining Area East(1) / West(2)"
			
			tab mining_area_bin, m
				
	* Education: use variable bild created in 1prepare and aggregate into two categories
			tab bild, m
			cap drop educ2
			gen educ2=. 
			replace educ2=0 if bild == 1 // neither vocational training or degree from universtiy
			replace educ2=1 if bild == 2 | bild == 3 // vocational training (Ausbildung) or degree from an university or university of applied science (Uni or FH)
			label define educLAB 0 "keine abg. Ausbild." 1 "abg. Ausbildung" 
			label values educ2 educLAB
			label variable educ2 "education 2 categories"

	* Age quantiles: Define age categories manually 
			cap drop agecat2	
			gen agecat2 = 1 if ageend >= 18 & ageend <= 30
			replace agecat2 = 2 if ageend > 30 & ageend <= 49
			replace agecat2 = 3 if ageend > 49 & ageend !=. 
			label var agecat2 "age at end of spell by category"
			label define agecat2LAB 1 "age18-30" 2 "age31-49" 3 "age50+" 
			label values agecat2 agecat2LAB
	* Decade of end of spell (decade 1 are actually more than two decades)
		cap drop decades
		g decades=.
		replace decades=1 if endepi>=mdy(01,01,1970) & endepi<mdy(01,01,1992)
		* note post-92 (incl. East Germany)
		replace decades=2 if endepi>=mdy(01,01,1992) & endepi<mdy(01,01,2000)
		* JAERE: if we wish to exclude pre-2000 data, include only decades 3&4
		replace decades=3 if endepi>=mdy(01,01,2000) & endepi<mdy(01,01,2010)
		replace decades=4 if endepi>=mdy(01,01,2010) & endepi!=.
		
		label var decades "decade in which spell ended"
		label define decadesLAB 1 "1970s-1980s" 2 "1990s" 3 "2000s" 4 "2010s"
		label values decades decadesLAB
		tab decade, m



/* ------------------------------------------------------------------------ */
 * 	(0.a) Investigate black holes and ATZ case (person103==1)
/* ------------------------------------------------------------------------ */
	
	*Black holes
	* How black holes (status==10) are treated
	tab status
	tab statsimple status, missing
	
	*Is it possible that first spell is a black hole
	tab status if spell==1

	*Is it possible that last spell is a black hole
	cap drop last_spell
	by pid (begepi), sort: gen byte last_spell = (_n == _N)
	tab status if last_spell==1
	
	* What is before?
	tab status if status[_n+1]==10 & pid==pid[_n+1]
	
	* What is after?
	tab status if status[_n-1]==10 & pid==pid[_n-1]

	*How many persons are concerned by black holes
	cap drop nvals 
	gen nobs_blackhole=0
	by pid status, sort: replace nobs_blackhole = _n == 1 if status==10
	tab nobs_blackhole, missing	
	
	*ATZ cases
	count if endepi<begepi & person103!=1
	count if endepi<begepi & person103==1
	tab statsimple Dlast103
	tab statsimple Dfirst103
	tab statsimple if mid103>begepi & mid103<endepi
	
/* ------------------------------------------------------------------------ */	
 * (0.b) Investigate difference between sample size of potret & sample size of people retiring	
/* ------------------------------------------------------------------------ */
	
	di "over 49"
		tab ageend if dpotret==1
		tab agepotret if dpotret==1
		count if agepotret==ageend
		count if agepotret<ageend
		count if agepotret>ageend
		tab agepotret ageend if dpotret==1 & agepotret!=ageend & agepotret>49
	di "grund 130"
		tab grund if dpotret==1
		tab grund if dpotret==1 & agepotret>49
	di "lignite"
		tab thisspelllignite if dpotret==1 
		tab thisspelllignite if dpotret==1 & agepotret>49
	di "statsimple=1"		
		tab statsimple if dpotret==1
		tab statsimple if dpotret==1 & agepotret>49

/* ------------------------------------------------------------------------ */
 * 	(1) Define transition var: mutually exclusive var on post-lignite destinations
/* ------------------------------------------------------------------------ */
	
	/* We use statsimple, which is 
			- 0 for unemployment, marginal employment or ALMP
			- 1 for normal employment
			- 2 for vocational employment
	note we have collated sequential */
		
	/* The transition variable should capture the following types of transitions:
			1) pretrans == 1	Lignite -> retirement (with no marginal employment)
			2) pretrans == 2	Lignite -> vocational
			3) pretrans == 3	Lignite -> other normalemp
			4) pretrans == 6	Lignite -> unemp/margemp/ALMP - retirement
			5) a) pretrans == 7	Lignite -> unemp/margemp/ALMP - other normalemp
			   b) pretrans == 8 Lignite -> unemp - normalemp (in lignite)
			6) pretrans == 10 	Lignite -> black hole
			7) pretrans == 11 	NON-Lignite -> unemp/margemp/ALMP - normal emp not in lignite
			8) pretrans == 12 	NON-Lignite -> unemp/margemp/ALMP - other */

		cap drop pretrans
		gen pretrans = 0

		* (Pretrans=1) Retirement (difficult: no direct indication in BeH - use last observed spell that not minijob (potret) - defined in 1prepare
		
		* 1 a) direct retirement out of unemployment (EXTREMELY FEW cases - grund 2022 & 2055 - case (6) more likely. for completeness here.
			bysort pid (begepi): replace pretrans = 1  if thisspelllignite == 1 & (grund == 509 | grund == 1154) & (agepotret>49 & agepotret!=.) & (dpotret==1)
						
		* 1 b) retirement from employment: end of job (grund=130),  above a certain age (over49) & no later spell observed in data (potret - defined above) => retirement 
			bysort pid (begepi): replace pretrans = 1  if thisspelllignite == 1 & statsimple==1 & (grund==130) & (agepotret>49 & agepotret!=.) & (dpotret==1) 
	
		* (Pretrans=2) Lignite - vocational
			bysort pid (begepi): replace pretrans = 2 if thisspelllignite == 1 & statsimple==1 & thisspelllignite[_n+1] == 0 & statsimple[_n+1]==2  
			
		* (Pretrans=3) Lignite - normalemp
			bysort pid (begepi): replace pretrans = 3 if thisspelllignite == 1 & statsimple==1 & thisspelllignite[_n+1] == 0 & statsimple[_n+1]==1  
						
		* (Pretrans=6) Lignite - unemp/margemp/ALMP - retirement
			bysort pid (begepi): replace pretrans = 6 if thisspelllignite == 1 & statsimple==1 & statsimple[_n+1]==0 & (agepotret>49 & agepotret!=.) & (dpotret[_n+1]==1)  
		
		* (pretrans=7) Lignite - unemp/margemp/ALMP - normal employment (not in lignite)
			bysort pid (begepi): replace pretrans = 7 if thisspelllignite == 1 & statsimple==1 & statsimple[_n+1]==0 & thisspelllignite[_n+2] == 0 & (statsimple[_n+2]==1)
					
		* (pretrans=8) Lignite - unemp/margemp/ALMP - normalemp (in lignite)
			bysort pid (begepi): replace pretrans = 8 if thisspelllignite == 1 & statsimple==1 & statsimple[_n+1]==0 & thisspelllignite[_n+2] == 1  & (statsimple[_n+2]==1)
					
		* (pretrans=10) Lignite - black hole
			bysort pid (begepi): replace pretrans = 10 if thisspelllignite == 1 & thisspelllignite[_n+1] != 1 & status[_n+1] == 10
				
		* (pretrans=11) NON-Lignite normal employment- unemp/margemp/ALMP - normal employment (not in lignite)
			bysort pid (begepi): replace pretrans = 11 if thisspelllignite == 0 & statsimple==1 & statsimple[_n+1]==0 & thisspelllignite[_n+2] == 0 & (statsimple[_n+2]==1)	
			
		* (pretrans=12) NON-Lignite normal employment- unemp/margemp/ALMP - employment in lignite
			bysort pid (begepi): replace pretrans = 12 if thisspelllignite == 0 & statsimple==1 & statsimple[_n+1]==0 & thisspelllignite[_n+2] == 1 & (statsimple[_n+2]==1)
					
		     	* Remark: Here we only consider individuals that have another employment spell after their unemployment spell. 
				*		  Individuals with series: ligemployment - nonligemployment - unemployment END will not be included in estimation
		
		label define pretransLAB	0 "Not pre (observed) trans'n out of lignite" ///
									1 "pre retirement (without minijob)" ///
									2 "pre trans'n 2 vocational training" ///
									3 "pre trans'n 2 other normal employment" ///
									6 "pre trans'n to unem/ALMP/marg,then retire" ///
									7 "pre trans'n 2 unemp/ALMP/marg,then non-lig emp" ///
									8 "pre trans'n 2 unemp/ALMP/marg,then lig emp" ///
									10 "pre trans'n 2 black hole" ///
									11 "pre transition out of NON-Lig into un/ALMP/marg ending in emp non lignite" ///
									12 "pre transition out of NON-Lig into un/ALMP/marg ending in emp lignite"
							 
		label values pretrans pretransLAB
		
		* Define the period after the transition
		sort pid begepi
		gen posttrans = 0
		bysort pid (begepi): replace posttrans = pretrans[_n-1] if pretrans[_n-1]!=.
		label define posttransLAB	0 "Not post trans'n out of lignite" ///
									1 "in retirement (no minijob) post-lignite" ///
									2 " post-lignite vocational training" ///
									3 "post-lignite normal employment" ///
									6 "post-lignite unemp/ALMP/marg ending in retirement" ///
									7 "post-lignite unemp/ALMP/marg ending in another emp"  ///
									8 "post-lignite unemp/ALMP/marg ending in lignite emp" ///
									10 "post-lignite black hole" ///
									11 "transition out of NON-Lig into un/ALMP/marg ending in emp non lignite" ///
									12 "transition out of NON-Lig into un/ALMP/marg ending in emp lignite"
		label values posttrans posttransLAB
		
		* compare occurence of different transitions (incl. non-lignite spells)
		tab pretrans, m
		tab posttrans, m
		
		tab pretrans statsimple, m
		tab posttrans statsimple, m
		
		tab pretrans status, m
		tab posttrans status, m
		
/* ------------------------------------------------------------------------ */
 *  (2) Post-lignite destinations
/* ------------------------------------------------------------------------ */	
*
*   (2.0) Overview of post-lignite destinations
*
		************************************************************************
		* Transition destinations after lignite
		************************************************************************
		* from part (1b) of 4transitions

		tab pretrans if pretrans>0 & thisspelllignite==1 & pretrans!=.
		
		* post-lignite destinations by decade *
		disp("Transitions out of lignite in the 1970s and 1980s")
		tab pretrans if pretrans>0 & thisspelllignite==1 & pretrans!=. & decades==1
		disp("Transitions out of lignite in the 1990s")		
		tab pretrans if pretrans>0 & thisspelllignite==1 & pretrans!=. & decades==2
		disp("Transitions out of lignite in the 2000s")
		tab pretrans if pretrans>0 & thisspelllignite==1 & pretrans!=. & decades==3
		disp("Transitions out of lignite in the 2010s")
		tab pretrans if pretrans>0 & thisspelllignite==1 & pretrans!=. & decades==4
	
		* Check: this should only give zeros (no transition) and ones (for retirement we have no post-trans obs)
		tab pretrans if posttrans[_n+1]!=pretrans 

		* Pretrans - ageendcat (age at the end of spell)
		tab pretrans ageendcat if pretrans>0 & thisspelllignite == 1, column row 
		*

		* post-lignite destinations by decade & age *
		*
		disp("Transitions out of lignite in the 1970s and 1980s")
		tab pretrans ageendcat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==1, column row
		disp("Transitions out of lignite in the 1990s")		
		tab pretrans  ageendcat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==2, column row
		disp("Transitions out of lignite in the 2000s")
		tab pretrans  ageendcat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==3, column row
		disp("Transitions out of lignite in the 2010s")
		tab pretrans  ageendcat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==4, column row			
	
		*  post-lignite destinations - by decade & gender *
		*
		tab pretrans frau if pretrans>0, column row
		* post-lignite destinations by decade & gender*
		disp("Transitions out of lignite in the 1970s and 1980s")
		tab pretrans frau if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==1, column row
		disp("Transitions out of lignite in the 1990s")		
		tab pretrans  frau if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==2, column row
		disp("Transitions out of lignite in the 2000s")
		tab pretrans  frau if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==3, column row
		disp("Transitions out of lignite in the 2010s")
		tab pretrans  frau if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==4	, column row		
		
		*  post-lignite destinations - by decade & education *
		*
		tab pretrans bild if pretrans>0, column row
		* post-lignite destinations by decade & education*
		disp("Transitions out of lignite in the 1970s and 1980s")
		tab pretrans bild if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==1, column row
		disp("Transitions out of lignite in the 1990s")		
		tab pretrans  bild if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==2, column row
		disp("Transitions out of lignite in the 2000s")
		tab pretrans  bild if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==3, column row
		disp("Transitions out of lignite in the 2010s")
		tab pretrans  bild if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==4, column row			

		*  post-lignite destinations - by decade & experience *
		*
		tab pretrans exp_cat if pretrans>0, column row
		disp("Transitions out of lignite in the 1970s and 1980s")
		tab pretrans exp_cat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==1, column row
		disp("Transitions out of lignite in the 1990s")		
		tab pretrans  exp_cat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==2, column row
		disp("Transitions out of lignite in the 2000s")
		tab pretrans  exp_cat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==3, column row
		disp("Transitions out of lignite in the 2010s")
		tab pretrans  exp_cat if pretrans>0 & pretrans!=. & thisspelllignite == 1 & decades==4, column row		
	
		**********************

		
	************************************************
	*** (2.1) Direct job-to-job transitions    *****
	************************************************
	* (previously section 2a in 4transitions)

	* only relatively few people changed sector but remained in same firm (these may be dodgy)
	count 		if pretrans[_n-1]==3 & pid==pid[_n-1] & betnr==betnr[_n-1] 

	* (2.1.a) Distribution of post-transition industries
	* 
	*** Which industries / sectors do people join after direct job-to-job moves?
	*** NB. relevant for JAERE revision

	tab wirtschaftszweige if posttrans ==3, sort 
	bys decades:	tab wirtschaftszweige if posttrans ==3, sort
	di "Destination industries of direct J2J lignite-leavers all areas by decades"
	forvalues t = 1/4 {
		cap tab wirtschaftszweige if posttrans == 3 & decades == `t' 
		cap estpost tab wirtschaftszweige if posttrans == 3 & decades == `t', sort
		cap estimates store y`t'
	}
	esttab y* using results/${samplefolder}/5inddec.csv, label plain replace
	estimates clear

	*** Differences across mining areas ***
	di "Destination industries of lignite-leavers across mining areas by decades"
	forvalues i = 1/5 {
		forvalues t = 1/4 {
			cap tab wirtschaftszweige if posttrans == 3 & decades == `t' & mining_area == `i'
			cap estpost tab wirtschaftszweige if posttrans == 3 & decades == `t' & mining_area == `i'
			cap estimates store y`t'
		}
		esttab y* using results/${samplefolder}/5inddecarea`i'.csv, label plain replace
		estimates clear
	}
	
	*** Differences across other characteristics (2010-2017) ***
	bys frau:		tab wirtschaftszweige if posttrans ==3 & decades==4 
	bys exp_cat:	tab wirtschaftszweige if posttrans ==3 & decades==4 
	bys ageendcat:	tab wirtschaftszweige if posttrans ==3 & decades==4 
	bys bild:		tab wirtschaftszweige if posttrans ==3 & decades==4

	*** Differences across other characteristics (post-2000) ***
	bys frau:		tab wirtschaftszweige if posttrans ==3 & (decades==3 | decades==4)
	bys exp_cat:	tab wirtschaftszweige if posttrans ==3 & (decades==3 | decades==4)
	bys ageendcat:	tab wirtschaftszweige if posttrans ==3 & (decades==3 | decades==4)
	bys bild:		tab wirtschaftszweige if posttrans ==3 & (decades==3 | decades==4)

	************************************************************
	*** (2.2) including indirect job-to-job transitions    *****
	************************************************************
	* including indirect job-to-job transitions out of lignite (7 & 8) as well as direct ones (3)

	g jposttrans=.
	bysort pid (begepi): replace jposttrans = 1 if (pretrans[_n-2]==7)	
	
	di "Destination indu of direct & indirect J2J out of lignite, all areas, by decades"
				tab wirtschaftszweige if (posttrans ==3 | jposttrans==1), sort 
	* for JAERE revision: focus on the most recent decade
	bys decades:	tab wirtschaftszweige if (posttrans ==3 | jposttrans==1), sort

	forvalues t = 1/4 {
		cap tab wirtschaftszweige if (posttrans == 3 | jposttrans==1) & decades==`t', sort
		cap estpost tab wirtschaftszweige if (posttrans == 3 | jposttrans==1) & decades == `t'
		cap estimates store y`t'
	}
	esttab y* using results/${samplefolder}/5inddec2.csv, label plain replace
	estimates clear

	****************************************************************************
	*** (2.3) geographic distribution of destinations after transitions    *****
	****************************************************************************
		
	* geographic locations prior to transitions
	* JEAERE 
	* bula 3 - Niedersachen
	* bula 5 - NRW
	* bula 12 - Brandenburg
	* bula 14 - Sachsen 
	
	gen pre_wo_bula = wo_bula if (pretrans==3 | pretrans==7) 
	gen pre_wo_kreis = wo_kreis if (pretrans==3 | pretrans==7) 

	gen pre_ao_bula = ao_bula if (pretrans==3 | pretrans==7)  
	gen pre_ao_kreis = ao_kreis if (pretrans==3 | pretrans==7) 
		
	* copy later geolocations to prior observations to allow for matrix
	* create "future geographic locations"
	
	bys pid (begepi): gen fut_wo_bula = wo_bula[_n+1] if pretrans==3
	bys pid (begepi): gen fut_ao_bula = ao_bula[_n+1] if pretrans==3 
	bys pid (begepi): gen fut_wo_kreis = wo_kreis[_n+1] if pretrans==3 
	bys pid (begepi): gen fut_ao_kreis = ao_kreis[_n+1] if pretrans==3 

	bys pid (begepi): replace fut_wo_bula = wo_bula[_n+2] if pretrans==7
	bys pid (begepi): replace fut_ao_bula = ao_bula[_n+2] if pretrans==7
	bys pid (begepi): replace fut_wo_kreis = wo_kreis[_n+2] if pretrans==7 
	bys pid (begepi): replace fut_ao_kreis = ao_kreis[_n+2] if pretrans==7 

	* Matrices of pre and post transition geographic location
	tab pre_ao_bula fut_ao_bula
	tab pre_wo_bula fut_wo_bula

	* where do people move to - by mining area
	tab fut_ao_bula if (pretrans==3 | pretrans==7)							
	bys mining_area: tab fut_ao_bula if (pretrans==3 | pretrans==7)			
	bys mining_area: tab fut_ao_kreis if (pretrans==3 | pretrans==7), sort	
																			
	tab fut_wo_bula if (pretrans==3 | pretrans==7)							
	bys mining_area: tab fut_wo_bula if (pretrans==3 | pretrans==7)			
	bys mining_area: tab fut_wo_kreis if (pretrans==3 | pretrans==7), sort	
					
	* what fracction of people who move to new jobs remain in same kreis/bula?
	g same_wo_kreis =. 
	replace same_wo_kreis=0 if (pre_wo_kreis!=. & fut_wo_kreis!=.)
	replace same_wo_kreis=1 if (pre_wo_kreis == fut_wo_kreis) & (fut_wo_kreis!=.)
	tab same_wo_kreis
	
	g same_wo_bula =. 
	replace same_wo_bula=0 if (pre_wo_bula!=. & fut_wo_bula!=.)
	replace same_wo_bula=1 if (pre_wo_bula == fut_wo_bula) & (fut_wo_bula!=.)
	tab same_wo_bula

	g same_ao_kreis =.
	replace same_ao_kreis = 0 if pre_ao_kreis !=.
	replace same_ao_kreis = 1 if (pre_ao_kreis == fut_ao_kreis) & (pre_ao_kreis !=.)
	tab same_ao_kreis
		
	g same_ao_bula = .
	replace same_ao_bula = 0 if pre_ao_bula != .
	replace same_ao_bula = 1 if (pre_ao_bula == fut_ao_bula) & (pre_ao_bula !=.)
	tab same_ao_bula
		
	**********************************************************************************
	*** (2.4) Distribution of occupations in destination jobs after transitions  *****
	**********************************************************************************

	* occupations in lignite
	tab beruf12 if thisspelllignite==1

	* occupations after direct J2J move *
	tab beruf12 if posttrans==3, sort
	bys decades:	tab beruf12 if (posttrans==3), sort

	* occupations after indirect J2J move *
	tab beruf12 if (posttrans==3 | jposttrans==1), sort
	bys decades:	tab beruf12 if (posttrans ==3 | jposttrans==1), sort
	
	* occupational transition matrix
	tab beruf12 if (pretrans==3 | jposttrans==1), m
	tab beruf12 if pretrans==13, m
	
	g beruf12_pre=.
	replace beruf12_pre=beruf12 if (pretrans==3 | jposttrans==1)
	
	g beruf12_post=.
	replace beruf12_post=beruf12 if (posttrans==3 | jposttrans==1)
	
/* ------------------------------------------------------------------------ */
 * 	(3) Save data before collating spells -> PRECOLL.DTA
/* ------------------------------------------------------------------------ */	
sav ${data}\precoll.dta, replace

/* ------------------------------------------------------------------------ */
 * 	BASIC DESCRIPTIVE STATISTICS ON CHARACTERISTICS BEFORE COLLATING SPELLS
/* ------------------------------------------------------------------------ */
* NB. these are descriptive stats on spells, not on individuals
tab ageend, m
tab agecat2, m		
tab frau, m
tab bild, m
tab educ2, m
tab mining_area, m	
tab beruf12, m
*We want to know if we have black hole spells in the sample (statsimple=.)
tab statsimple,m		

/* ------------------------------------------------------------------------ */
 *  (4) Collate duration to estimate transition parameters 
/* ------------------------------------------------------------------------ */			
	/*	Later calculated
		- rho (ex mu: retirement)
		- delta_lig
		- delta_nonlig
		- lambda_nonlig (lambda_lig we set to zero...)
	
		To estimate [rho, delta_lig, delta_nonlig] we need durations of employment before transitions:
		Collate job spells by status for full duration of (employment) status prior to transition. 
			-rho (ex-mu): retirement probability (pretrans 1 and 6)
			     rhodirect (pretrans = 1) /  rhoindirect (pretrans = 6)
			- delta_lig (pretrans = 7 & pretrans = 8): proba of job loss out of lignite
			- delta_nonlig (pretrans = 11 & pretrans = 12): proba of job loss out of non-lignite

		To estimate [lambda_nonlig, lambda_nonlig] we need durations of unemployment after the transition.
		Collate spells by status for full duration of unemployment status after transition. 
			- lambda_nonlig (posttrans=7): finding a job in non-lignite if unemployed  				
			- lambda_lig (posttrans=8): finding a job in lignite if unemployed (although =0 by assumption)
	*/	
	
	/* ------------------------------------------------------------------------ */
	* for rho and delta collate durations in * same status * employment before transition 
	/* ------------------------------------------------------------------------ */

	* To estimate the duration pre-retirement and pre-unemployment, we are interested in 
	* the whole period of an individual in which the status remains identical. 
	* Statsimple:	0 - unemp, ALMP or margemp
	*				1 - normalemp
	*				2 - vocational training
	
			* Gen a variable that counts (backwards) the number of spells with the same status ending at the transition spell
			sort pid begepi
			cap drop consecutive
			*Start counting at the pre-transition spell.
			bys pid (begepi): gen consecutive = 1 if inlist(pretrans,1,6,7,8,10,11,12) //remove 3 (j2j-transitions) because they otherwise break the consecutive series
			
			* Count as long (backwards) as status stays the same
			* this simply repeats the operation of adding consecutive spells until no more replacements are made.
			local more 1
			while `more'{
			clonevar consecutive2=consecutive
			bys pid (begepi): replace consecutive = consecutive[_n+1] + 1 if consecutive[_n+1]!=. & consecutive==. & statsimple==statsimple[_n+1] 
			count if consecutive2!=consecutive
			local more = r(N)
			drop consecutive2
			}
	
			* Use the maximum number of spells with the same status as the end for the loop below
			tab consecutive
			sum consecutive
			local end = r(max)
			disp(`end')

			forvalues i = 2/`end' {
				* Manipulate begepi
				// Here we extend the first spell within each stack of consecutive spells
				bys pid (begepi): replace begepi = begepi[_n-1] if consecutive[_n-1] == `i' & consecutive == 1 
				* Drop the *first of consecutive spells*
				bys pid (begepi): drop if consecutive == `i' & consecutive[_n+1] == 1
				}

			* REMARK: Only the information of the *last spell* is kept! 
			* We do not want to use this for descriptive statistics on income etc. 
			* (thus will go back to dataset prior to collating status (dataset called precoll) below)

	/* ------------------------------------------------------------- */
	* for lambda Collate durations in unemp after transition 
	/* ------------------------------------------------------------- */
			
			sort pid begepi
			cap drop consecutive
			*Start counting at relevant posttransition spells.
			bys pid (begepi): gen consecutive = 1 if inlist(posttrans,7,8,11,12) 

			* Count as long (backwards) as status stays the same
			* this simply repeats the operation of adding consecutive spells until no more replacements are made.
			local more 1
			while `more'{
			clonevar consecutive2=consecutive
			bys pid (begepi): replace consecutive = consecutive[_n-1] + 1 if consecutive[_n-1]!=. & consecutive==. & status==status[_n-1] 
			count if consecutive2!=consecutive
			local more = r(N)
			drop consecutive2
			}	
								
			* Generate duration of status
			cap drop dur
			gen dur = (endepi - begepi) + 1
			label var dur "duration in labour mkt status"
	
/* ------------------------------------------------------------------------ */
 * 	SAVE AFTER COLLATING SPELLS -> POSTCOLL.DTA
/* ------------------------------------------------------------------------ */				
save ${data}\postcoll.dta, replace

/* ------------------------------------------------------------------------ */
 * 	BASIC DESCRIPTIVE STATISTICS ON CHARACTERISTICS AFTER COLLATING SPELLS
/* ------------------------------------------------------------------------ */
tab ageend, m
tab agecat2, m		
tab frau, m
tab educ2, m
tab mining_area, m
*We want to know if we have black hole spells in the sample (statsimple=.)
tab statsimple,m	
tab beruf12, m
		*************************************************************** 
		*****  Number of distinct persons working in coal in 2017 ****		
		*************************************************************** 
		count if begepi<mdy(6,1,2017) & endepi>mdy(6,1,2017) & thisspelllignite==1 & statsimple==1
		putexcel set results/${samplefolder}/5_sample${sample}_wholesample_stats, sheet("nb_distinctpersons_coal2017") modify
		putexcel B1=("nb of distinct persons working in coal2017") A2= ("whole sample") B2=(r(N))
		`putexcelclose'	

		*************************************************************** 
		********  Age distribution in lignite in 2017 ***************			
		*************************************************************** 
		* Population of lignite workers in latest period *
		cap drop presentcoal2017 agecoal2017
		gen presentcoal2017=0
		replace presentcoal2017=1 if thisspelllignite==1 & begepi<mdy(6,1,2017) & endepi>mdy(6,1,2017) 
		gen agecoal2017=2017-year(geb_dat) if presentcoal2017==1 
		tab agecoal2017, matrow(matname)
		capture noisily estpost tab agecoal2017
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_stats, sheet("distrib_age_coal2017") modify
			putexcel B1=("age in coal in 2017") A2= matrix(matname) B2=matrix(e(b)')
			`putexcelclose'
		}
		
		*************************************************************** 
		********(4.0.a ) Diagnostics on spell durations ***************			
		*************************************************************** 

	* JAERE - R2.MC7 - Distribution of length of spells in lignite
	* in 7biocoal
				
		* (a) transition from employment to retirement or unemployment
			* Distribution of duration of last pre-transition status
			
			sum dur if inlist(pretrans,1,6,7,8,11,12), detail
		
			* Satus before transition 
			tab pretrans statsimple

			* Duration in status before transition
			foreach i in 0 1 2 3 6 7 8 10 11 12 {
				local j = `i'+2
				di "Duration in status before pretrans `i'"	
				capture noisily estpost sum dur if pretrans == `i', d 
				if _rc!=2000{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesample_stats, sheet("duration_pretrans") modify
					putexcel A`j'=("pretrans_`i'") B1=("nb spells") B`j'=matrix(e(count)) C1=("mean") C`j'=matrix(e(mean))  
					`putexcelclose'
				}
			}
			
		* (b) transition from unemployment to employment
			* Satus before transition 
			tab posttrans statsimple
			
			* Duration in status before transition
			foreach i in 0 1 2 3 6 7 8 10 11 12 {
				local j = `i'+2
				di "Duration in status before posttrans `i'"	
				capture noisily estpost sum dur if posttrans == `i', d 
				if _rc!=2000{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesample_stats, sheet("duration_posttrans") modify
					putexcel A`j'=("posttrans_`i'") B1=("nb spells") B`j'=matrix(e(count)) C1=("mean") C`j'=matrix(e(mean))  
					`putexcelclose'
				}	
			}

			* detailed by sex/year
			tabulate frau if (posttrans==7 | posttrans==8), summarize(dur) 
			tabulate jahrend if (posttrans==7 | posttrans==8), summarize(dur) 
			tabulate jahrend frau if (posttrans==7 | posttrans==8), summarize(dur) 
			tabulate frau if (posttrans==11 | posttrans==12), summarize(dur) 
			tabulate jahrend if (posttrans==11 | posttrans==12), summarize(dur) 
			tabulate jahrend frau if (posttrans==11 | posttrans==12), summarize(dur) 		

		*******************************************************************
		********(4.0.b ) Diagnostics on (very) short spells ***************			
		*******************************************************************

			tab status if dur<7 & dur!=.
			tab status if dur<30 & dur!=.

			su dur if dur<30 & dur!=. & (status == 4 | status == 1 | status == 0), det
			su dur if dur<30 & dur!=. & (status == 3 | status == 2), det
			
			* pre & post short spells
			g short=1 if dur<7 & dur!=.
			
			* what is BEFORE short spells?		
			g preshort=status[_n-1] if pid==pid[_n-1] & short==1
			tab preshort
			* what is before short unemp spells?
			tab preshort if status==0 & short==1
			* what is before short emp spells?
			tab preshort if (status == 3 | status == 2) & short==1

			* what is AFTER short spells?			
			g postshort=status[_n+1] if pid==pid[_n+1] & short==1
			tab postshort
			* what is after short unemp spells?
			tab postshort if status==0 & short==1
			* what is after short emp spells?
			tab postshort if (status == 3 | status == 2) & short==1


/* ------------------------------------------------------------------------ */
 *  (5) First estimation on the whole sample
/* ------------------------------------------------------------------------ */	
		 * 5.1 We calculate estimator for the whole sample before cutting spells that cross macro scenario (make sense only for estimation by cell)	
		 * 5.2 Same	but taking into account right censoring due to black_hole and end of spell 
 
	/* ------------------------------------------------------------------------ */
	 *  (5.1) Without taking into account any type of censoring
	/* ------------------------------------------------------------------------ */
			
	* We calculate estimator for the whole sample before cutting spells that cross macro scenario (make sense only for estimation by cell)
	* Probability of transition = (1/duration of status)		( -> Poisson rate)

	/* Different possibilities of scenarios in the loop to test excluding outliers
		- 0 : use whole sample
		- 0.01 : exclude bottom and top outliers of the duration distribution at percentile 0.01
		- 1 : exclude bottom and top outliers of the duration distribution at percentile 1
		- 2 : exclude bottom and top outliers of the duration distribution at percentile 2
		- 3 : excluding 1 day duration spells
		- 4 : excluding 1 to 6 days duration spells
		- 5 : excluding 1% top duration spells
		- 6 : excluding 2% top duration spells
		- 7 : excluding all observations of people who have at least 1 day spell
	*/
	
	foreach x in 0 /*0.01 0.1 1 2 3 4 5 6 7*/ {
	
	use ${data}\postcoll.dta, clear
	
	cap drop keep
	gen keep=1

	* Whole sample
	if  `x'==0 {
	di "Analysis on the whole sample"
	}
		
	* Removing spells at top and bottom percentile of distribution	
	if `x'>0 & `x'<3 {
	di "Analysis excluding bottom and top outliers of the duration distribution at the p`x'"	
	
	local top= 100-`x'
	_pctile dur, p(`x' `top')
	return list
	local threshold_bottom = `r(r1)'	
	local threshold_top = `r(r2)'	
	replace keep=0 if dur<`threshold_bottom' | dur>`threshold_top'
	}
	
	* Removing 1 day duration spells
	if `x'==3 {
	di "Analysis excluding 1 day duration spells"
	replace keep=0 if dur==1
	}	
	
	* Removing  1 to 6 day duration spells
	if `x'==4 {
	di "Analysis excluding 1 to 6 day duration spells"
	replace keep=0 if dur<7
	}	
	
	* Removing 1% top distribution spells
	if `x'==5 {
	di "Analysis excluding 1% top distribution spells"
	_pctile dur, p(99)
	return list
	local threshold_top = `r(r1)'	
	replace keep=0 if dur>`threshold_top'
	}
	
	* Removing 2% top distribution spells
	if `x'==6 {
	di "Analysis excluding 2% top distribution spells"
	_pctile dur, p(98)
	return list
	local threshold_top = `r(r1)'	
	replace keep=0 if dur>`threshold_top'
	}
	
	* Removing all observations of people who have at least 1 day spell
	if `x'==7 {
	di "Analysis excluding all observations of people who have at least 1 day spell"
	cap drop shortdur
	gen shortdur=0
	bysort persnr (begepi): replace shortdur=1 if dur==1
	tab shortdur
	*copy values to all observations
	cap drop indivshort
	bysort persnr (begepi): egen indivshort=max(shortdur)
	tab indivshort
	replace keep=0 if indivshort==1
	}
	
	tab keep
	sum dur if keep==0, detail
	tab pretrans keep
	tab posttrans keep	
	drop if keep==0	
	sum dur, detail	
	
		*  (5.1.a)  Retirement probability rho
			
		* 	We estimate three different rhos (ex-mu's:)
		*  	1) do not distinguish between those two types of retirement
		*	2) rho (ex-mu) direct retirement probability  (pretrans == 1)	-> lignite -> retirement
		*	3) rho (ex-mu) indirect retirement probability (pretrans == 6) 	-> lignite -> unemp/marg emp/ALMP -> retirement
		
		di "Lignite leavers' average job duration retirement = rho"
		capture noisily estpost sum dur if (pretrans==1)
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("rhodirect") modify
			putexcel A2=("Lignite leavers' average job duration before direct retirement") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean)) 
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("rhodirect") modify
				putexcel B1=("rhodirect") B2=formula(=1/D2)
				`putexcelclose'
			}
		}	
		capture noisily estpost sum dur if (pretrans==6)
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("rhoindirect") modify
			putexcel A2=("Lignite leavers' average job duration before indirect retirement") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean))
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("rhoindirect") modify
				putexcel B1=("rhoindirect") B2=formula(=1/D2)
				`putexcelclose'
			}
		}
		capture noisily estpost sum dur if (pretrans==1 | pretrans == 6)
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("rho") modify
			putexcel A2=("Lignite leavers' average job duration before retirement") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean))  
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("rho") modify
				putexcel B1=("rho") B2=formula(=1/D2)
				`putexcelclose'
			}
		}
		
		*  (5.1.b) Job loss rate delta
		
		* 1) deltalig 		-> Duration of lignite normal emp => that ends in unemployment, if unemployment ends in any employment later (pretrans 7 and 8)
		* 2) deltanonlig 	-> Duration of non-lignite normal emp => that ends in unemployment, if unemployment ends in any employment later (pretrans 11 and 12)

		di "Lignite leavers' average job duration before unemployment"
		capture noisily estpost sum dur if (pretrans==7 | pretrans == 8)
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("deltalig") modify
			putexcel A2=("Lignite leavers' average job duration in lignite before unemployment") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean))
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("deltalig") modify
				putexcel B1=("deltalig") B2=formula(=1/D2)
				`putexcelclose'
			}
		}		
		capture noisily estpost sum dur if (pretrans==11 | pretrans == 12)
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("deltanonlig") modify
			putexcel A2=("Lignite leavers' average job duration in NON lignite before unemployment") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean)) 
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("deltanonlig") modify
				putexcel B1=("deltanonlig") B2=formula(=1/D2)
				`putexcelclose'
			}
		}
		
		*  (5.1.c) Job offer arrival rate / Job finding rate lambda
	
		* 1) lambdalig		-> finding a job in lignite if unemployed (=0 by assumption) after lignite - posttrans = 8 (lignite - unemp/ALMP/marg - job in lignite)
		* 2) lambdanonlig	-> finding a job in non-lignite if unemployed after lignite - posttrans = 7 (lignite - unemp/ALMP/marg - job in non-lignite)
		* 3) lambdazerolig	-> finding a job in non-lignite if unemployed after non-lignite - posttrans = 11 (non-lignite - unemp/ALMP/marg - job in non-lignite)	
		 		 
	
		di "Lignite leavers' average unemployment duration (ending in a new lignite or non-lignite job)"
		capture noisily estpost sum dur if  posttrans==7
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("lambdanonlig") modify
			putexcel A2=("Lignite leavers' average unemployment duration ending in a NON lignite job") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean)) 
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("lambdanonlig") modify
				putexcel B1=("lambdanonlig") B2=formula(=1/D2)
				`putexcelclose'
			}
		}
		capture noisily estpost sum dur if  posttrans==8
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("lambdalig") modify
			putexcel A2=("Lignite leavers' average unemployment duration ending in a lignite job") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean)) 
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("lambdalig") modify
				putexcel B1=("lambdalig") B2=formula(=1/D2)
				`putexcelclose'
			}
		}
		
		di "NON Lignite leavers' average unemployment duration (ending in a NON-lignite job)"
		capture noisily estpost sum dur if  posttrans==11
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("lambdazerolig") modify
			putexcel A2=("NON Lignite leavers' average unemployment duration ending in a NON lignite job") C1=("nb spells") D1=("duration") C2=matrix(e(count)) D2=matrix(e(mean)) 
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesample_estimates_NoCensoring, sheet("lambdazerolig") modify
				putexcel B1=("lambdazerolig") B2=formula(=1/D2)
				`putexcelclose'
			}
		}
	}	
		
	
	/* ------------------------------------------------------------------------ */
	 *  (5.2) taking into account right censoring due to black_hole and end of spell 
	/* ------------------------------------------------------------------------ */
	/*ASSUMPTION : constant risk over time	
	CENSORING : waiting time > observation time
	METHOD :	we use the maximum likelihod estimate of the hazard =  [ total number of transitions / total exposure time ]
		
	Identify general right censoring due to no observation of transition
	2 cases
	- right censoring due to last spell
	- right censoring due to black hole
	*/
		
		use ${data}\postcoll.dta, clear	
		
		*Identify last spell
		cap drop last_spell1
		gen last_spell1=0
		by pid (begepi), sort: replace last_spell1 = (_n==_N)
		tab last_spell1		
		tab pretrans last_spell1
		tab posttrans last_spell1
		*caution: transition to retirement are not last spell
		replace last_spell1=0 if (posttrans==1 | posttrans==6 | pretrans==1 | pretrans==6)
		tab last_spell1		
		tab pretrans last_spell1
		tab posttrans last_spell1

		*Identify black holes that come next
		cap drop black_hole_next1
		gen black_hole_next1=0
		by pid (begepi), sort: replace black_hole_next1=1 if (pid[_n+1]==pid & status[_n+1]==10)
		tab black_hole_next1
		tab pretrans black_hole_next1
		tab posttrans black_hole_next1

		cap drop end1
		g end1=1
		replace end1=0 if last_spell1==1 | black_hole_next1==1
		tab end1
		tab pretrans end1	
		tab posttrans end1
		
		*  Exposure time for estimation
		
		* Identify spells after person enter in 103, keeping date of enter for the spells after entering
			cap drop enter103
			gen enter103=.
			bys persnr (first103): replace enter103=first103[1]
			format enter103 %tdDDmonYY
		
		* Identify Employment periods in Lignite 
			cap drop Emp_Lig1
			gen Emp_Lig1=0
			bysort pid (begepi): replace Emp_Lig1=1 if statsimple==1 & thisspelllignite== 1	
			di "check pretrans when employed in lignite"
			tab pretrans Emp_Lig1
		
		* Identify Employment periods NOT in Lignite
			cap drop Emp_NonLig1
			gen Emp_NonLig1=0
			bysort pid (begepi): replace Emp_NonLig1=1 if statsimple==1 & thisspelllignite==0
			di "check pretrans when employed in non lignite"
			tab pretrans Emp_NonLig1		
			
		* Identify Unemployment periods after Employment in Lignite
			cap drop Unemp_postLig1
			gen Unemp_postLig1=0
			bysort pid (begepi): replace Unemp_postLig1 =1 if statsimple==0 & statsimple[_n-1]==1 & thisspelllignite[_n-1]== 1
			di "check pretrans when unemployed after lignite"
			tab pretrans Unemp_postLig1 			
			di "check postrans when unemployed after lignite"
			tab posttrans Unemp_postLig1 

		* Identify Unemployment periods after Employment NOT in Lignite
			cap drop Unemp_postNonLig1
			g Unemp_postNonLig1=0
			bysort pid (begepi): replace Unemp_postNonLig1 = 1 if statsimple==0 & statsimple[_n-1]==1 & thisspelllignite[_n-1]== 0	
			di "check pretrans when unemployed after non lignite"
			tab pretrans Unemp_postNonLig1 			
			di "check postrans when unemployed after non lignite"
			tab posttrans Unemp_postNonLig1 			
		
		* for rho : Time in employment in lignite, including ATZ
			* When ATZ case (person103==1): end of duration of employment is mid103 instead of endepi
			* We then redefine the duration at risk : durretrisk
			cap drop durretrisk1
			gen durretrisk1=dur
			replace durretrisk1=(mid103-begepi)+1 if person103==1 & mid103>begepi & mid103<endepi
			* spells started after the mid-point of early retirement should not contribute to retirement risk
			replace durretrisk1=0 if person103==1 & mid103<begepi
			sum durretrisk1, detail	
			sum durretrisk1 if Emp_Lig1==1
			sum durretrisk1 if Emp_Lig1==1 & (end1==1 & pretrans==1)
			sum durretrisk1 if Emp_Lig1==1 & (end1==1 & pretrans==6)	
			sum durretrisk1 if Emp_Lig1==1 & end1==0		
			
		* for delta and lambda: Time in employment in lignite or non lignite, excluding ATZ			
			cap drop durrisk1
			gen durrisk1=dur
			replace durrisk1=0 if person103==1 & begepi>=enter103				
			replace durrisk1=(enter103-begepi)+1 if person103==1 & enter103>begepi & enter103<endepi			
			
		* for delta: Time in employment in lignite/non lignite excluding ATZ
			di "check durrisk for deltalig - Time in employment in lignite "
			sum durrisk1 if Emp_Lig1==1			
			sum durrisk1 if Emp_Lig1==1 & (end1==1 & pretrans==7) 
			sum durrisk1 if Emp_Lig1==1 & (end1==1 & pretrans==8)
			sum durrisk1 if Emp_Lig1==1 &  end1==0
			
			di "check durrisk for deltanonlig - Time in employment non lignite"
			sum durrisk1 if Emp_NonLig1==1
			sum durrisk1 if Emp_NonLig1==1 & (end1==1 & pretrans==11)
			sum durrisk1 if Emp_NonLig1==1 & (end1==1 & pretrans==12)
			sum durrisk1 if Emp_NonLig1==1 &  end1==0
			
		* for lambda: Time in unemployment after lignite/non lignite excluding ATZ
			di "check durrisk for lambdalig - Time in unemployment after lignite"		
			sum durrisk1 if Unemp_postLig1==1
			sum durrisk1 if Unemp_postLig1==1 & (end1==1 & posttrans==7)
			sum durrisk1 if Unemp_postLig1==1 & (end1==1 & posttrans==8)
			sum durrisk1 if Unemp_postLig1==1 &  end1==0
			
			di "check durrisk for lambdalig - Time in unemployment after non lignite"
			tab posttrans Unemp_postNonLig1 						
			sum durrisk1 if Unemp_postNonLig1==1
			sum durrisk1 if Unemp_postNonLig1==1 & (end1==1 & posttrans==11)
			sum durrisk1 if Unemp_postNonLig1==1 & (end1==1 & posttrans==12)
			sum durrisk1 if Unemp_postNonLig1==1 &  end1==0

	save ${data}\temp5.dta, replace

	* Distribution of short spell
	tab pretrans durretrisk1 if durretrisk1<7
	tab posttrans durretrisk1 if durretrisk1<7

	tab pretrans durrisk1 if durrisk1<7
	tab posttrans durrisk1 if durrisk1<7

	/* Different possibilities of scenarios in the loop to test excluding outliers
		- 0 : use whole sample
		- 0.01 : exclude bottom and top outliers of the duration distribution at percentile 0.01
		- 1 : exclude bottom and top outliers of the duration distribution at percentile 1
		- 2 : exclude bottom and top outliers of the duration distribution at percentile 2
		- 3 : excluding 1 day duration spells
		- 4 : excluding 1 to 6 days duration spells
		- 5 : excluding 1% top duration spells
		- 6 : excluding 2% top duration spells
		- 7 : excluding all observations of people who have at least 1 day spell
	*/
	
	foreach x in 0 /*0.01 0.1 1 2 3 4 5 6 7*/ {
	
	use ${data}\temp5.dta, clear
	
	cap drop keep
	gen keep=1
	
	* Whole sample
	if  `x'==0 {
	di "Analysis on the whole sample"
	}
		
	* Removing spells at top and bottom percentile of distribution	
	if `x'>0 & `x'<3 {
	di "Analysis excluding bottom and top outliers of the duration distribution at the p`x'"	
	
	local top= 100-`x'
	_pctile dur, p(`x' `top')
	return list
	local threshold_bottom = `r(r1)'	
	local threshold_top = `r(r2)'	
	replace keep=0 if dur<`threshold_bottom' | dur>`threshold_top'
	}
	
	* Removing 1 day duration spells
	if `x'==3 {
	di "Analysis excluding 1 day duration spells"
	replace keep=0 if dur==1
	}	
	
	* Removing  1 to 6 day duration spells
	if `x'==4 {
	di "Analysis excluding 1 to 6 day duration spells"
	replace keep=0 if dur<7
	}	
	
	* Removing 1% top distribution spells
	if `x'==5 {
	di "Analysis excluding 1% top distribution spells"
	_pctile dur, p(99)
	return list
	local threshold_top = `r(r1)'	
	replace keep=0 if dur>`threshold_top'
	}
	
	* Removing 2% top distribution spells
	if `x'==6 {
	di "Analysis excluding 2% top distribution spells"
	_pctile dur, p(98)
	return list
	local threshold_top = `r(r1)'	
	replace keep=0 if dur>`threshold_top'
	}
	
	* Removing all observations of people who have at least 1 day spell
	if `x'==7 {
	di "Analysis excluding all observations of people who have at least 1 day spell"
	cap drop shortdur
	gen shortdur=0
	bysort persnr (begepi): replace shortdur=1 if dur==1
	tab shortdur
	*copy values to all observations
	cap drop indivshort
	bysort persnr (begepi): egen indivshort=max(shortdur)
	tab indivshort
	replace keep=0 if indivshort==1
	}
	
	tab keep
	sum dur if keep==0, detail
	tab pretrans keep
	tab posttrans keep
	drop if keep==0	
	sum dur, detail
	sum durretrisk1, detail
	sum durrisk1, detail		
	
		*  (5.2.a)  Retirement probability rho 
		di "Lignite leavers' Retirement probability"
			* Number of distinct persons in cells at risk (working in lignite)
			cap drop dummy infopers countinfopers
			gen dummy=0
			replace dummy=1 if Emp_Lig1==1 & durretrisk1!=0
			bys persnr: egen infopers=max(dummy)
			bys persnr: gen countinfopers=1 if (infopers==1 & _n==1)
			capture noisily estpost sum countinfopers	
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhodirect") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhoindirect") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
			}
			* Exposure time : time of employment in lignite (Emp_Lig1==1)
			capture noisily estpost sum durretrisk1 if Emp_Lig1==1
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhodirect") modify
				putexcel E1=("time at risk") E2=matrix(e(sum))
				`putexcelclose'
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhoindirect") modify
				putexcel E1=("time at risk") E2=matrix(e(sum))
				`putexcelclose'
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
				putexcel E1=("time at risk") E2=matrix(e(sum))
				`putexcelclose'
			}
			*rhodirect (ex mudirect)
			* Transitions : not right-censored (end==1) and pretrans==1
			capture noisily estpost sum end1 if pretrans==1
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhodirect") modify
				putexcel C1=("transitions") C2=matrix(e(sum))
				`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhodirect") modify
					putexcel B1=("rhodirect") B2=formula(=C2/E2)
					`putexcelclose'
				}
			}
			* rhoindirect (ex-mu indirect)
			* Transitions : not right-censored (end==1) and pretrans==6
			capture noisily estpost sum end1 if pretrans==6
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhoindirect") modify
				putexcel C1=("transitions") C2=matrix(e(sum))
				`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rhoindirect") modify
					putexcel B1=("rhoindirect") B2=formula(=C2/E2)
					`putexcelclose'
				}
			}
			*rho (ex-mu)
			* Transitions : not right-censored (end==1) and pretrans==1 or 6
			capture noisily estpost sum end1 if pretrans==1 | pretrans==6
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
				putexcel C1=("transitions") C2=matrix(e(sum))
				`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
					putexcel B1=("rho") B2=formula(=C2/E2)
					`putexcelclose'
				}
			}

		*  (5.2.b) Job loss rate delta 
		
		di "Lignite leavers' Job loss rate"
			* Number of distinct persons in cells at risk (working in lignite)
			cap drop dummy infopers countinfopers
			gen dummy=0
			replace dummy=1 if Emp_Lig1==1 & durrisk1!=0
			bys persnr: egen infopers=max(dummy)
			bys persnr: gen countinfopers=1 if (infopers==1 & _n==1)
			capture noisily estpost sum countinfopers	
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
			}

			*delta lig
			* Exposure time : time of employment in lignite (Emp_Lig==1)
			capture noisily estpost sum durrisk1 if Emp_Lig1==1
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
				putexcel E1=("time at risk") E2=matrix(e(sum))
				`putexcelclose'
			}
			* Transitions : not right-censored (end==1) and pretrans==7 or 8
			capture noisily estpost sum end1 if pretrans==7 | pretrans==8
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
				putexcel C1=("transitions") C2=matrix(e(sum))
				`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
					putexcel B1=("deltalig") B2=formula(=C2/E2)
					`putexcelclose'
				}
			}
			
		di "Non Lignite leavers' Job loss rate"	
			* Number of distinct persons in cells at risk (working in non lignite)
			cap drop dummy infopers countinfopers
			gen dummy=0
			replace dummy=1 if Emp_NonLig1==1 & durrisk1!=0
			bys persnr: egen infopers=max(dummy)
			bys persnr: gen countinfopers=1 if (infopers==1 & _n==1)
			capture noisily estpost sum countinfopers
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
			}
		*delta nonlig
			* Exposure time : time of employment NOT in lignite (Emp_NoLig==1) 
			capture noisily estpost sum durrisk1 if Emp_NonLig1==1
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
				putexcel E1=("time at risk") E2=matrix(e(sum))
				`putexcelclose'
			}
			* Transitions : not right-censored (end==1) and pretrans==11 or 12
			capture noisily estpost sum end1 if pretrans==11 | pretrans==12
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
				putexcel C1=("transitions") C2=matrix(e(sum))
				`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
					putexcel B1=("deltanonlig") B2=formula(=C2/E2)
					`putexcelclose'
				}
			
			}
		
		*  (5.2.c) Job offer arrival rate / Job finding rate lambda	
		di "Lignite leavers' Job finding rate (ending in a new lignite or non-lignite job)"
			* Number of distinct persons in cells unemployed after lignite
				* - ending in non lignite or lignite employment (end1==1 & (posttrans==7|posttrans==8))  
				* - OR censored (end1==0)
			cap drop dummy infopers countinfopers
			gen dummy=0
			replace dummy=1 if Unemp_postLig1==1 & durrisk1!=0 & ((end1==1 & (posttrans==7 | posttrans==8)) | end1==0)
			bys persnr: egen infopers=max(dummy)
			bys persnr: gen countinfopers=1 if (infopers==1 & _n==1)
			capture noisily estpost sum countinfopers		
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
			}
	
		* Exposure time : time of unemployment after lignite (Unemp_postLig==1) 
			* - ending in non lignite or lignite employment (end==1 & (posttrans==7|posttrans==8)) 
			* - OR censored (end==0)
			capture noisily estpost sum durrisk1 if Unemp_postLig1==1 & ((end1==1 & (posttrans==7 | posttrans==8)) | end1==0)	
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
				putexcel E1=("time at risk") E2=matrix(e(sum))
				`putexcelclose'
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
				putexcel E1=("time at risk") E2=matrix(e(sum))
				`putexcelclose'
			}
		*lambda nonlig
			* Transitions : not right-censored (end==1) and posttrans==7(NonLig)
			capture noisily estpost sum end1 if posttrans==7
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
				putexcel C1=("transitions") C2=matrix(e(sum))
				`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
					putexcel B1=("lambdanonlig") B2=formula(=C2/E2)
					`putexcelclose'
				}
			}
		*lambda lig
			* Transitions : not right-censored (end==1) and posttrans==8(Lig)
			capture noisily estpost sum end1 if posttrans==8
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
				putexcel C1=("transitions") C2=matrix(e(sum))
				`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
					putexcel B1=("lambdalig") B2=formula(=C2/E2)
					`putexcelclose'
				}
			}

	di "Non Lignite leavers' Job finding rate (ending in a non-lignite job)"
			* Number of distinct persons unemployed after non lignite
			* - ending in Non lignite or lignite employment (end1==1 & (posttrans==11 | posttrans==12) 
			* - OR censored (end1==0)
			cap drop dummy infopers countinfopers
			gen dummy=0
			replace dummy=1 if Unemp_postNonLig1==1 & ((end1==1 & (posttrans==11 | posttrans==12)) | end1==0) & durrisk1!=0
			bys persnr: egen infopers=max(dummy)
			bys persnr: gen countinfopers=1 if (infopers==1 & _n==1)
			capture noisily estpost sum countinfopers
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
				putexcel A2=("All sample") D1=("nb distinct persons at risk") D2=matrix(e(count))
				`putexcelclose'
			}
		* Exposure time : time of unemployment after non lignite (Unemp_postnonLig==1) 
			* - ending in Non lignite or lignite employment (end==1 & (posttrans==11 | posttrans==12) 
			* - OR censored (end==0)
		capture noisily estpost sum durrisk1 if Unemp_postNonLig1==1 & ((end1==1 & (posttrans==11 | posttrans==12)) | end1==0)	
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
			putexcel E1=("time at risk") E2=matrix(e(sum))
			`putexcelclose'
		}
		* Transitions : not right-censored (end==1) and posttrans==11 (NonLig)
		capture noisily estpost sum end1 if posttrans==11
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
			putexcel C1=("transitions") C2=matrix(e(sum))
			`putexcelclose'
				if ${iab}==1{
					putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
					putexcel B1=("lambdazerolig") B2=formula(=C2/E2)
					`putexcelclose'
				}
		}
	}	
		
/* ------------------------------------------------------------------------ */
 *  (6) To create cells : cut spells crossing macro periods
/* ------------------------------------------------------------------------ */			
		* bad macro conditions = 1 / good macro conditions = 2
		* based on approximation to create roughly equally sized sample sizes: 
		* good macro is less than 10% unemployment:
		* 	 	Lausitz:    from (incl) 2015-
		* 		Rheinisches from (incl) 2007 -
		*       Helmstedter from (incl) 2008 - 
		* 		Mitteldeutsches from (incl) 2016 -
		* 		West-Germany: Unemployment below 10% in all years since 1990
		* 		East Germany: Unemployment below 10% for women since 2012, for men since 2015
		
		use ${data}\postcoll.dta, clear

	/* ------------------------------------------------------------- */
		* 6.1 Identify change in macro labour market conditions and cut spells crossing macro periods* 
	/* ------------------------------------------------------------- */	
	tab mining_area, missing
	
	cap drop date_macro
	g date_macro=mdy(01,01,1900)
	format date_macro %tdD_m_CY	
	*Lausitz 2015
	replace date_macro=mdy(01,01,2015) if mining_area==1
	*Mitteldeutsches 2016
	replace date_macro=mdy(01,01,2016) if mining_area==2
	*Helmstedter from 2008
	replace date_macro=mdy(01,01,2008) if mining_area==3	
	*Rheinisches from 2007	
	replace date_macro=mdy(01,01,2007) if mining_area==4	
	*East Germany 2012 for women, 2015 for men
	replace date_macro=mdy(01,01,2012) if (mining_area==. | mining_area==5) & ao_bula>=12 & frau==1
	replace date_macro=mdy(01,01,2015) if (mining_area==. | mining_area==5) & ao_bula>=12 & frau==0
	
	tab date_macro, m

	* JAERE request: Alternative to macro conditions, include only data post 2000
	*                To implement this sample selection, use the macro variable &
	*				 cell-based estimation
					 
	if ${sample}==7{
	replace date_macro=mdy(01,01,2000)
}	
	if (${sample}==8 | ${sample}==9){
	* JAERE: include spells from all time periods 
	*  in analysis cut by occupations & regions.
	replace date_macro=mdy(01,01,1975)
	}
	
	*Identify when there's a change in macro during one spell
	cap drop cross
	gen cross=0
	replace cross=1 if date_macro>begepi & date_macro<endepi
	tab cross

	*Duplicate the spell
	cap drop id
	generate long id = _n
	expand 2 if cross==1
	
	*Store initial end and beggining of spells
	g begepi_init=begepi
	g endepi_init=endepi
	
	g censor=0
	* in first observation
		*Replace endepi by date of change 
		by id, sort: replace endepi = cond(_n == 1 & cross==1, date_macro-1, endepi) 
		*Create indicator for right censoring
		by id, sort: replace censor = cond(_n == 1 & cross==1, 1, censor) 
	* in second observation
		*Replace begepi by date of change	
		by id, sort: replace begepi = cond(_n == 2 & cross==1, date_macro, begepi) 
		*Create indicator for left censoring
		by id, sort: replace censor = cond(_n == 2 & cross==1, 2, censor)	
	
	tab censor

	replace dur = (endepi - begepi) + 1

	* results:
	* 1 - dataset with no spells crossing macro periods
	* 2 - indicator indicating censoring (right=1, left=2, no censoring=0)	
	
	* Identify right censoring due to cutting spells
	* create variable "end_cut" =0 if if right-censored, =1 if spell ending (=failure) is observed.
	g end_cut=1
	replace end_cut=0 if censor==1
	label var end_cut "0 means spell is censored by end of macro conditions"
	tab end_cut
	tab pretrans end_cut
	tab posttrans end_cut
	
	* Identify general right censoring due to no observation of transition
	/* 3 cases
		- right censoring created by cutting spell due to macroeconomic change
		- right censoring due to last spell
		- right censoring due to black hole
	*/

	*Identify last spell
	cap drop last_spell
	gen last_spell=0
	by pid (begepi), sort: replace last_spell = (_n == _N)
	tab pretrans last_spell	
	tab posttrans last_spell
	*caution: transition to retirement are not last spell
	replace last_spell=0 if (posttrans==1 | posttrans==6 | pretrans==1 | pretrans==6)
	tab last_spell		
	tab pretrans last_spell
	tab posttrans last_spell

	* Identify black holes that come next
	cap drop black_hole_next
	gen black_hole_next=0
	by pid (begepi), sort: replace black_hole_next=1 if (pid[_n+1]==pid & status[_n+1]==10)
	tab black_hole_next
	tab pretrans black_hole_next
	tab posttrans black_hole_next

	tab pretrans end_cut
	tab posttrans end_cut
	
	* Here generate variable to indicate that there really is a transition. 
	* end=1 means there is a transition.
	* Alternatives to a transition are 
	* (1) censoring due to macro (end_cut=0); (2) last spell in survey; (3) black hole afterwards.
	* => if none of these three, we have a transition.
	cap drop end

	g end=1
	replace end=0 if (end_cut==0 | last_spell==1 | black_hole_next==1)
	tab end
	tab pretrans end	
	tab posttrans end
	
		*************************************************************** 
		********(6.0.a ) Diagnostics on new spell durations ***************			
		*************************************************************** 
			
			* Distribution of duration of last pre-transition status
			sum dur if inlist(pretrans,1,6,7,8,11,12), detail

			* to fill in (?) in the following lines: status before transition 
			tab pretrans statsimple
			tab pretrans statsimple if thisspelllignite==1
	
			* (a) transition to retirement
			disp("Duration of lignite normalemp/vocational series followed directly or indirectly by retirement")
			sum dur if (pretrans == 6 | pretrans==1), detail
			disp("Duration of lignite normalemp/vocational series directly followed by retirement")
			sum dur if pretrans == 1, detail
			disp("Duration of lignite normalemp/vocational series followed by unemp/margemp/ALMP and then retirement")
			sum dur if pretrans == 6, detail
			
			* (b) transition to unemployment
			disp("Duration of lignite normalemp/vocational series followed by unemployment")
			sum dur if (pretrans==7 | pretrans==8), detail
			disp("Duration of lignite normalemp/vocational series followed by unemployment ending in employment in NON lignite")
			sum dur if (pretrans==7), detail
			disp("Duration of lignite normalemp/vocational series followed by unemployment ending in employment in lignite")
			sum dur if (pretrans==8), detail
	
			disp("Duration of NON-lignite normalemp/vocational series followed by unemployment")
			sum dur if (pretrans==11 | pretrans==12), detail	
			disp("Duration NON-lignite normalemp/vocational series followed by unemployment ending in employment in NON lignite")
			sum dur if (pretrans==11), detail
			disp("Duration of NON-lignite normalemp/vocational series followed by unemployment ending in employment in lignite")
			sum dur if (pretrans==12), detail
			
			* (c) transition from unemployment	
			disp("Duration of Post lignite unemployment series followed by employment")
			sum dur if (posttrans==7 | posttrans==8), detail
			* detailed by sex/year
			tabulate frau if (posttrans==7 | posttrans==8), summarize(dur) 
			tabulate jahrend if (posttrans==7 | posttrans==8), summarize(dur) 
			tabulate jahrend frau if (posttrans==7 | posttrans==8), summarize(dur) 
			
			disp("Duration of Post lignite unemployment series followed by employment in NON lignite")
			sum dur if (posttrans==7), detail
			disp("Duration of Post lignite unemployment series followed by employmentin Lignite")
			sum dur if (posttrans==8), detail
	
			disp("Duration of Post NON-lignite unemployment series followed by employment")
			sum dur if (posttrans==11 | posttrans==12), detail	
			disp("Duration of Post NON-lignite unemployment series followed by employment in NON lignite")
			sum dur if (posttrans==11), detail
			disp("Duration of Post NON-lignite unemployment series followed by employment in Lignite")
			sum dur if (posttrans==12), detail		
			
	/* ------------------------------------------------------------- */
		* 6.2 create variable for macro scenarios * 
	/* ------------------------------------------------------------- */	
		
		g macro=.
		* bad macro conditions = 1 / good macro conditions = 2
		* approximations here
		* (1) good macro is less than 10% unemployment
		* (2) 	Lausitz:    from (incl) 2015-
		* 		Rheinisches from (incl) 2007 -
		*       Helmstedter from (incl) 2008 - 
		* 		Mitteldeutsches from (incl) 2016 -
		replace macro=1 if mining_area== 1 & endepi<mdy(01,01,2015)
		replace macro=1 if mining_area== 2 & endepi<mdy(01,01,2016)
		replace macro=1 if mining_area== 3 & endepi<mdy(01,01,2008)
		replace macro=1 if mining_area== 4 & endepi<mdy(01,01,2007)
		
		replace macro=2 if mining_area== 1 & endepi>=mdy(01,01,2015)
		replace macro=2 if mining_area== 2 & endepi>=mdy(01,01,2016)
		replace macro=2 if mining_area== 3 & endepi>=mdy(01,01,2008)
		replace macro=2 if mining_area== 4 & endepi>=mdy(01,01,2007)

		* West-Germany: Unemployment below 10% in all years since 1990 
		replace macro=2 if (mining_area==. | mining_area==5) & ao_bula<12

		* East Germany: Unemployment below 10% for women since 2012, for men since 2015
		replace macro=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi<mdy(01,01,2012) & frau==1
		replace macro=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi<mdy(01,01,2014) & frau==0
		replace macro=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi>=mdy(01,01,2012) & frau==1
		replace macro=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi>=mdy(01,01,2014) & frau==0
		label define macroLAB 1 "hi-unemp" 2 "lo-unemp" 
		label values macro macroLAB

		if ${sample}==7{
		replace macro=.
		replace macro=1 if endepi<mdy(01,01,2000)
		replace macro=2 if endepi>=mdy(01,01,2000)
		}	
		if (${sample}==8 | ${sample}==9){
		replace macro=1
		label define macroLAB 1 "all macro-econ conditions", replace 
		label values macro macroLAB
		}	
		
		tab macro, m

	/* ------------------------------------------------------------- */
		* 6.3 Recalculate age and decades at the end of spell
	/* ------------------------------------------------------------- */	
	tab ageend	
	tab agebeg
	cap drop jahrend
	cap drop jahrbeg
	gen jahrend = year(endepi)
	gen jahrbeg = year(begepi)
	label var jahrend "year at end of spell"
	label var jahrbeg "year at end of spell"
	replace ageend = jahrend - year(geb_dat)
	replace agebeg = jahrbeg - year(geb_dat)
	label var ageend "age at end of spell"
	label var agebeg "age at start of spell"
	di "tab age at end of last spell of sampled workers"
	tab ageend, m 

	cap drop agecat2
	gen agecat2=.
	replace agecat2 = 1 if ageend >= 18 & ageend <= 30
	replace agecat2 = 2 if ageend > 30 & ageend <= 49
	replace agecat2 = 3 if ageend > 49 & ageend !=. 
	tab agecat2, m
	
	* Decade of end of spell (decade 1 are actually more than two decades)
	cap drop decades
	g decades=.
	replace decades=1 if endepi>=mdy(01,01,1970) & endepi<mdy(01,01,1992)

	* note post-92 (incl. East Germany)
	replace decades=2 if endepi>=mdy(01,01,1992) & endepi<mdy(01,01,2000)
	replace decades=3 if endepi>=mdy(01,01,2000) & endepi<mdy(01,01,2010)
	replace decades=4 if endepi>=mdy(01,01,2010) & endepi!=.
	
	tab decades, m	
			
	/* ------------------------------------------------------------------------ */
		*  6.4 Exposure time for estimation
	/* ------------------------------------------------------------------------ */	

		* Identify spells after person enter in 103, keeping date of enter for the spells after entering
			cap drop enter103
			gen enter103=.
			bys persnr (first103): replace enter103=first103[1]
			format enter103 %tdDDmonYY
	
		* Identify Employment periods in Lignite 
			cap drop Emp_Lig
			gen Emp_Lig=0
			bysort pid (begepi): replace Emp_Lig=1 if statsimple==1 & thisspelllignite== 1	
			di "check pretrans when employed in lignite"
			tab pretrans Emp_Lig
		
		* Identify Employment periods NOT in Lignite
			cap drop Emp_NonLig
			gen Emp_NonLig=0
			bysort pid (begepi): replace Emp_NonLig=1 if statsimple==1 & thisspelllignite==0
			di "check pretrans when employed in non lignite"
			tab pretrans Emp_NonLig		
			
		* Identify Unemployment periods after Employment in Lignite
			cap drop Unemp_postLig
			gen Unemp_postLig=0
			bysort pid (begepi): replace Unemp_postLig =1 if statsimple==0 & statsimple[_n-1]==1 & thisspelllignite[_n-1]== 1
			di "check pretrans when unemployed after lignite"
			tab pretrans Unemp_postLig 			
			di "check postrans when unemployed after lignite"
			tab posttrans Unemp_postLig

		* Identify Unemployment periods after Employment NOT in Lignite
			cap drop Unemp_postNonLig
			g Unemp_postNonLig=0
			bysort pid (begepi): replace Unemp_postNonLig = 1 if statsimple==0 & statsimple[_n-1]==1 & thisspelllignite[_n-1]== 0	
			di "check pretrans when unemployed after non lignite"
			tab pretrans Unemp_postNonLig 			
			di "check postrans when unemployed after non lignite"
			tab posttrans Unemp_postNonLig 			
		
		* for rho (ex-mu) : Time in employment in lignite, including ATZ
			* When ATZ case (person103==1): end of duration of employment is mid103 instead of endepi
			* We then redefine the duration at risk : durretrisk

			* create new variable counting start of spell or start of old-age period,
			* because we do not want to diminish retirement risk in old-age by 
			* adding time at risk pre-50 years of age

			g begepiret=begepi
			* only if spell started before 50th birthday 
			* & person is over 50 at end => take 50 as start date of spell.
			label var begepiret "begepi for cell-estimation of retirement probs (based on 50-year-cut-off)"
			replace begepiret =mdy(month(geb_dat),day(geb_dat),year(geb_dat)+50) if (ageend >=50 & agebeg<50)
			*if spell started after 50 years of age, keep original spell start.
			* if spell started before 50 years of age and 
			*	=> take date of 50th birthday as start of spell
			format begepiret %tdDDmonYY

			gen durretrisk=dur
			replace durretrisk=(mid103-begepiret)+1 if (person103==1) & (mid103>begepiret) & (mid103<endepi)
			* zero risk of retirement once you are beyond mid-point of ATZ
			* (you are basically already retired, only legally not.)
			replace durretrisk=0 if (person103==1 & mid103<begepiret)
			sum durretrisk, detail	
			* on average, how much time are lignite workers at risk of retirement
			sum durretrisk if Emp_Lig==1
			* on average, how much time are lignite workers at risk before they directly retire?
			sum durretrisk if Emp_Lig==1 & (end==1 & pretrans==1)
			* on average, how much time are lignite workers at risk before they indirectly retire?
			sum durretrisk if Emp_Lig==1 & (end==1 & pretrans==6)	
			* on average, how much time are lignite workers at risk if they are censored (no transition)
			sum durretrisk if Emp_Lig==1 & end==0		
			
		* for delta and lambda: Time in employment in lignite or non lignite, excluding ATZ			
			gen durrisk=dur
			replace durrisk=0 if person103==1 & begepi>=enter103				
			replace durrisk=(enter103-begepi)+1 if person103==1 & enter103>begepi & enter103<endepi			
			
		* for delta: Time in employment in lignite/non lignite excluding ATZ
			di "check durrisk for deltalig - Time in employment in lignite "
			sum durrisk if Emp_Lig==1			
			sum durrisk if Emp_Lig==1 & (end==1 & pretrans==7) 
			sum durrisk if Emp_Lig==1 & (end==1 & pretrans==8)
			sum durrisk if Emp_Lig==1 &  end==0
			
			di "check durrisk for deltanonlig - Time in employment non lignite"
			sum durrisk if Emp_NonLig==1										
			sum durrisk if Emp_NonLig==1 & (end==1 & pretrans==11)				
			sum durrisk if Emp_NonLig==1 & (end==1 & pretrans==12)				
			sum durrisk if Emp_NonLig==1 &  end==0								
			
		* for lambda: Time in unemployment after lignite/non lignite excluding ATZ
			di "check durrisk for lambdalig - Time in unemployment after lignite"		
			sum durrisk if Unemp_postLig==1
			sum durrisk if Unemp_postLig==1 & (end==1 & posttrans==7)
			sum durrisk if Unemp_postLig==1 & (end==1 & posttrans==8)
			sum durrisk if Unemp_postLig==1 &  end==0
			
			di "check durrisk for lambdalig - Time in unemployment after non lignite"
			tab posttrans Unemp_postNonLig 						
			sum durrisk if Unemp_postNonLig==1
			sum durrisk if Unemp_postNonLig==1 & (end==1 & posttrans==11)
			sum durrisk if Unemp_postNonLig==1 & (end==1 & posttrans==12)
			sum durrisk if Unemp_postNonLig==1 &  end==0
					
/* ------------------------------------------------------------------------ */
 *  (7) Cell sizes
 * To choose cell sizes: How many obs per type & how many transitions ?
/* ------------------------------------------------------------------------ */	
				
/* Sample==2 
 Cell groups (2*2*3*2 = 24 cells)
- gender (2 values)
- education (2 values)
- age category (3 values)
- macro conditions (2 values)
*/	

/* ------------------------------------------------------------------------ */
 * 	CHECK MISSING FOR CHARACTERISTICS DEFINING CELLS
/* ------------------------------------------------------------------------ */
* All cells
tab ageend, m
tab agecat2, m		
tab frau, m
tab educ2, m
tab macro, m

* Only men (cells 1 to 12)
tab ageend if frau==0, m
tab agecat2 if frau==0, m		
tab educ2 if frau==0, m
tab macro if frau==0, m

* Note that observations by same individuals may
* now be in different cells (agecat, macro)
if ${sample}<7{
cap drop cell
	if ${iab}==0{
	egen cell=group(frau educ2 agecat2 macro), label
	}
	if ${iab}==1{
	egen cell=group(frau educ2 agecat2 macro), label(cell, replace)
	}
global cellnumber "24"
}

if ${sample}==7{
* JAERE sample with data post-2000
cap drop cell
	if ${iab}==0{
	egen cell=group(macro), label
	}
	if ${iab}==1{
	egen cell=group(macro), label(cell, replace)
	}
label define cell 1 "pre-2000" 2 "post-2000", replace
label val cell cell
global cellnumber "2"
}

if ${sample}==8{
* JAERE sample for different occupations
cap drop cell
	if ${iab}==0{
	egen cell=group(beruf12), label
	}
	if ${iab}==1{
	egen cell=group(beruf12), label(cell, replace)
	}
	global cellnumber "11"
}
if ${sample}==9{
* JAERE sample for different regions
cap drop cell
gen cell=.
* cells 1-4 = four mining areas (1=Lausitz, 2=Mitteld., 3=Helmstedt, 4=Rheinisch, 5=Other) 
replace cell = mining_area
* cell 5 = West Germany
replace cell = 5 if (mining_area==. | mining_area==5) & ao_bula < 12
* cell 6 = East Germany
replace cell = 6 if (mining_area==. | mining_area==5) & (ao_bula>12 & ao_bula<.)
global cellnumber "6"
label define cell 1 "Lausitzer Revier" 2 "Mitteldt. Revier" 3 "Helmstedter Revier" 4 "Rheinisches Revier" 5 "Other West Ger" 6 "Other East Ger"
label val cell cell
}

			*7a) Number of spells in cells 
if ${sample}==2{
			di "total number of spells in cells: education, age, unemp, gender"
			}
if ${sample}==8{
			di "total number of spells in cells: by occupation (beruf12)"
			}
if ${sample}==9{
			di "total number of spells in cells: by region"
			}
			describe cell
			label list cell
			tab cell, matcell(spellcounts) matrow(matname)
			putexcel set results/${samplefolder}/5_sample${sample}_cells_stats.xlsx, sheet("nb_spells") modify
			putexcel B1=("nb spells") A2= matrix(matname) B2=matrix(spellcounts)
			`putexcelclose'

			*7bi) Number of distinct persons in cells
			cap drop dummy infopers countinfopers
			gen dummy=1 
			bys persnr cell: egen infopers=max(dummy)
			bys persnr cell: gen countinfopers=1 if (infopers==1 & _n==1)
			estpost tabstat countinfopers, by(cell)  statistics(count) columns(statistics)
			putexcel set results/${samplefolder}/5_sample${sample}_cells_stats.xlsx, sheet("nb_distinctpersons") modify
			putexcel B1=("nb distinct persons") A2= matrix(matname) B2=matrix(e(count)')
			`putexcelclose'
			drop dummy infopers countinfopers
			
			*7bii) Number of distinct persons in lignite coal in 2017
			* Method 1
			tab cell if begepi<mdy(6,1,2017) & endepi>mdy(6,1,2017) & thisspelllignite==1 & statsimple==1, matrow(matname)
			capture noisily estpost tab cell if begepi<mdy(6,1,2017) & endepi>mdy(6,1,2017) & thisspelllignite==1 & statsimple==1
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_cells_stats, sheet("nb_distinctpersons_coal2017") modify
				putexcel B1=("nb distinct persons in coal in 2017 (method 1)") A2=matrix(matname) B2=matrix(e(b)')
				`putexcelclose'
			}
			*Method 2
			cap drop dummy infopers countinfopers
			gen dummy=1 if begepi<mdy(6,1,2017) & endepi>mdy(6,1,2017) & thisspelllignite==1 & statsimple==1 
			bys persnr cell: egen infopers=max(dummy)
			bys persnr cell: gen countinfopers=1 if (infopers==1 & _n==1)
			capture noisily estpost tabstat countinfopers, by(cell)  statistics(count) columns(statistics)
			if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_cells_stats.xlsx, sheet("nb_distinctpersons_coal2017") modify
			putexcel C1=("nb distinct persons in coal in 2017 (method 2)") C2=matrix(e(count)')
			`putexcelclose'
			}
			drop dummy infopers countinfopers
			
			*7c) Number of TRANSITIONS in cells 
*			- transition to retirement (pretrans = 1 | pretrans = 6 )
*			- transition to unemployment (pretrans = 7)
*			- transition from unemployment to job (pretrans = 8)				*/

			di "transitions: number of observations: education, gender, age (stata) "
			foreach i in 1 6 7 8 11 12 {
				tab cell if pretrans == `i', matcell(spellcounts_`i') matrow(matname)
				putexcel set results/${samplefolder}/5_sample${sample}_cells_stats.xlsx, sheet("nb_transitions`i'") modify
				putexcel B1=("nb transitions`i'") A2= matrix(matname) B2=matrix(spellcounts_`i')
				`putexcelclose'
			}

/* ------------------------------------------------------------------------ */
 * 	SAVE AFTER CUTTING SPELLS AND CREATING CELLS -> POSTCOLL_CELL.DTA
/* ------------------------------------------------------------------------ */	
save ${data}\postcoll_cell.dta, replace

/* ------------------------------------------------------------------------ */
 * 	BASIC DESCRIPTIVE STATISTICS ON CHARACTERISTICS AFTER CUTTING SPELLS
/* ------------------------------------------------------------------------ */
tab ageend, m
tab agecat2, m		
tab frau, m
tab educ2, m
tab mining_area, m	
tab macro, m
*We want to know if we have black hole spells in the sample (statsimple=.)
tab statsimple,m	

/* ------------------------------------------------------------------------ */
 *  (8) Estimate transition parameters by cell (wage parameters in part 9)
/* ------------------------------------------------------------------------ */			
/*	8.a - rho (ex mu): retirement
	8.b - deltalig, deltanonlig
	8.c - lambdalig, lambdanonlig, lambdazerolig

	ASSUMPTION : constant risk over time	
	CENSORING : waiting time > observation time
	METHOD :	we use the maximum likelihod estimate of the hazard =  [total number of transition / total exposure time]
*/
	
	/* Different possibilities of scenarios in the loop to test excluding outliers
		- 0 : use whole sample
		- 0.01 : exclude bottom and top outliers of the duration distribution at percentile 0.01
		- 1 : exclude bottom and top outliers of the duration distribution at percentile 1
		- 2 : exclude bottom and top outliers of the duration distribution at percentile 2
		- 3 : excluding 1 day duration spells
		- 4 : excluding 1 to 6 days duration spells
		- 5 : excluding 1% top duration spells
		- 6 : excluding 2% top duration spells
		- 7 : excluding all observations of people who have at least 1 day spell
	*/
		
	foreach x in 0 /*0.01 0.1 1 2 3 4 5 6 7*/ {
	
	use ${data}\postcoll_cell.dta, clear
	
	cap drop keep
	gen keep=1

	* Whole sample
	if  `x'==0 {
	di "Analysis on the whole sample"
	local folder="all"
	}
		
	* Removing spells at top and bottom percentile of distribution	
	if `x'>0 & `x'<3 {
	di "Analysis excluding bottom and top outliers of the duration distribution at the p`x'"	
	if `x'==0.01{ 
			local folder="P001"
	}
	if `x'==0.1{ 
	local folder="P01"
	}
	if `x'==1{ 
		local folder="P1"
	}
	if `x'==2{ 
		local folder="P2"
	}
	local top= 100-`x'
	_pctile dur, p(`x' `top')
	return list
	local threshold_bottom = `r(r1)'	
	local threshold_top = `r(r2)'	
	replace keep=0 if dur<`threshold_bottom' | dur>`threshold_top'
	}
	
	* Removing 1 day duration spells
	if `x'==3 {
	di "Analysis excluding 1 day duration spells"
	local folder="Without1day"
	replace keep=0 if dur==1
	}	
	
	* Removing  1 to 6 day duration spells
	if `x'==4 {
	di "Analysis excluding 1 to 6 day duration spells"
	local folder="Without1to6day"
	replace keep=0 if dur<7
	}	
	
	* Removing 1% top distribution spells
	if `x'==5 {
	di "Analysis excluding 1% top distribution spells"
	local folder="WithoutP1Top"	
	_pctile dur, p(99)
	return list
	local threshold_top = `r(r1)'	
	replace keep=0 if dur>`threshold_top'
	}
	
	* Removing 2% top distribution spells
	if `x'==6 {
	di "Analysis excluding 2% top distribution spells"
	local folder="WithoutP2Top"		
	_pctile dur, p(98)
	return list
	local threshold_top = `r(r1)'	
	replace keep=0 if dur>`threshold_top'
	}
	
	* Removing all observations of people who have at least 1 day spell
	if `x'==7 {
	di "Analysis excluding all observations of people who have at least 1 day spell"
	local folder="WithoutPersnr1day"		
	cap drop shortdur
	gen shortdur=0
	bysort persnr (begepi): replace shortdur=1 if dur==1
	tab shortdur
	*copy values to all observations
	cap drop indivshort
	bysort persnr (begepi): egen indivshort=max(shortdur)
	tab indivshort
	replace keep=0 if indivshort==1
	}
	
	tab keep
	sum dur if keep==0, detail
	tab pretrans keep
	tab posttrans keep	
	drop if keep==0	
	sum dur, detail
	sum durrisk, detail
	
	cap mkdir results/${samplefolder}/`folder'
	
	/* -------------------------------------------- */
	 *  (8a)  Retirement probability rho (ex-mu)
	/* -------------------------------------------- */
	* 	We estimate three different rhos (ex-mu's:)
	*  	1) do not distinguish between those two types of retirement
	*	2) rho (ex-mu) direct retirement probability  (pretrans == 1)	-> lignite -> retirement
	*	3) rho (ex-mu) indirect retirement probability (pretrans == 6) 	-> lignite -> unemp/marg emp/ALMP -> retirement

	* by cells
	di "Lignite leavers' Retirement probability (according to cell)"

	* Number of distinct persons at risk			
	cap drop dummy infopers countinfopers
	gen dummy=0
	replace dummy=1 if Emp_Lig==1 & durretrisk!=0
	bys persnr cell: egen infopers=max(dummy)
	bys persnr cell: gen countinfopers=1 if (infopers==1 & _n==1)
			
	forvalues i = 1/$cellnumber {
		local j=`i'+2
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
		putexcel A`j'=("`i'")
		`putexcelclose'
		* Number of distinct persons at risk
		di "distinct cell `i'"
		capture noisily estpost sum countinfopers if cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
			putexcel D1=("nb distinct persons at risk") D`j'=matrix(e(count))
			`putexcelclose'
		}
		
		* Exposure time : time of employment in lignite (Emp_Lig1==1)
		di "durretrisk cell `i'"
		capture noisily estpost sum durretrisk if Emp_Lig==1 & cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
			putexcel E1=("time at risk") E`j'=matrix(e(sum))
			`putexcelclose'
		}
		
		*rho (ex-mu)
		* Transitions : not right-censored (end==1) and pretrans==1 or 6
		di "transition cell `i'"
		capture noisily estpost sum end if (pretrans==1 | pretrans==6) & cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
			putexcel C1=("transitions") C`j'=matrix(e(sum))
			`putexcelclose'	
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("rho") modify
				putexcel B1=("rho") B`j'=formula(=C`j'/E`j')
				`putexcelclose'
			}
		}
 }


*Age distribution in lignite in 2017 by cells
cap drop presentcoal2017 agecoal2017
gen presentcoal2017=0
replace presentcoal2017=1 if thisspelllignite==1 & begepi<mdy(6,1,2017) & endepi>mdy(6,1,2017) 
gen agecoal2017=2017-year(geb_dat) if presentcoal2017==1 

* we create a sheet with age in row and cell in columns
* we thus want to assign cell number 1 in column B, 
*  cell number 2 in column C, etc.
* for this we attribute a number for each letter in the alphabet: 
* we use `c(ALPHA)' which returns a string containing "A B C ... Z" 
* and tokenize which divides string into tokens
tokenize "`c(ALPHA)'"
* we create a local variable k =2 which is incremented by 1 
* at each loop on cells, 
* and use ``k'' to return the letter corresponding to position k in alphabet 
local k=2
forvalues i = 1/$cellnumber {
	forvalues a = 18/65{
		local r=`a'-16
		capture noisily count if agecoal2017==`a' & cell==`i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_age, sheet("distrib_age_coal2017") modify
			putexcel A`r'=("`a'") ``k''1=("cell`i'") ``k''`r'=(r(N))
			`putexcelclose'
		}
	}
	local ++k
}
			
**************************************
*** rho by age 50-65, whole sample ***
**************************************
	forvalues j = 50/65 {
		local r=`j'-48
		* rho now not by cells, but by specific ages 
		* Exposure time : time of employment in lignite (Emp_Lig1==1)
		di "durretrisk age `j'"
		capture noisily estpost sum durretrisk if Emp_Lig==1 & ageend == `j'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_age, sheet("rho_wholesample") modify
			putexcel A`r'=("`j'") D1=("time at risk") D`r'=matrix(e(sum))
			`putexcelclose'
		}
 
		* Transitions : not right-censored (end==1) and pretrans==1 or 6
		di "transition age `i'"
		capture noisily estpost sum end if (pretrans==1 | pretrans==6) & ageend == `j'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_age, sheet("rho_wholesample") modify
			putexcel C1=("transitions") C`r'=matrix(e(sum))
			`putexcelclose'	
			if ${iab}==1{
			putexcel set results/${samplefolder}/5_age, sheet("rho_wholesample") modify
			putexcel B1=("rho") B`r'=formula(=C`r'/D`r')
			`putexcelclose'
			}
		}
	}
	
*******************************************************
*** rho by age 50-65, by cell (if cells incl. ages) ***
*******************************************************

if ${sample}<7{
* START: 
* skip this part if cells include no age-specific categories:
* in benchmark, we have age-specific categories, in JAERE-requested
* samples 7, 8, and 9, we have no age-specific categories

cap drop cellbis cellret
egen cellbis=group(frau educ2 macro), label
tab cellbis, m
gen cellret=.
replace cellret=5 if cellbis==1
replace cellret=6 if cellbis==2
replace cellret=11 if cellbis==3
replace cellret=12 if cellbis==4
replace cellret=17 if cellbis==5
replace cellret=18 if cellbis==6
replace cellret=23 if cellbis==7
replace cellret=24 if cellbis==8
tab cellret, m
global cellretnumber "5 6 11 12 17 18 23 24"

foreach c of global cellretnumber {
	forvalues j = 50/65 {
		local r=`j'-48
		di "durretrisk cell`c' age `j'"		
		* Exposure time : time of employment in lignite (Emp_Lig==1)
		di "durretrisk cell`c' age `j'"
		capture noisily estpost sum durretrisk if Emp_Lig==1 & ageend == `j' & cellret==`c'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_age, sheet("rho_cell`c'") modify
			putexcel A`r'=("`j'") D1=("time at risk") D`r'=matrix(e(sum))
			`putexcelclose'
		}
 
		* Transitions : not right-censored (end==1) and pretrans==1 or 6
		di "transition cell`c' age `j'"
		capture noisily estpost sum end if (pretrans==1 | pretrans==6) & ageend == `j' & cellret==`c'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_age, sheet("rho_cell`c'") modify
			putexcel C1=("transitions") C`r'=matrix(e(sum))
			`putexcelclose'	
			if ${iab}==1{
			putexcel set results/${samplefolder}/5_age, sheet("rho_cell`c'") modify
			putexcel B1=("rho") B`r'=formula(=C`r'/D`r')
			`putexcelclose'
			}
		}
	}
}
}

	/* -------------------------------------------- */
	 *  (8b) Job loss rate delta 
	/* -------------------------------------------- */	
	
	* 1) deltalig 		-> Duration of lignite normal emp => that ends in unemployment, if unemployment ends in any employment later (pretrans 7 and 8)
	* 2) deltanonlig 	-> Duration of non-lignite normal emp => that ends in unemployment, if unemployment ends in any employment later (pretrans 11 and 12)
	
	* by cells
		di "Job loss rate (by cell - unemployment after lignite only)"
		* Number of distinct persons in cells at risk (working in lignite)
		cap drop dummy infopers countinfopers
		gen dummy=0
		replace dummy=1 if Emp_Lig==1 & durrisk!=0
		bys persnr cell: egen infopers=max(dummy)
		bys persnr cell: gen countinfopers=1 if (infopers==1 & _n==1)
		
		forvalues i = 1/$cellnumber {
		local j=`i'+2
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
		putexcel A`j'=("`i'")
		`putexcelclose'
		* Number of distinct persons in cells at risk (working in lignite)
		di "distinct cell `i'"
		capture noisily estpost sum countinfopers if cell == `i'
		if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
		putexcel D1=("nb distinct persons at risk") D`j'=matrix(e(count))
		`putexcelclose'
		}
		* Exposure time : time of employment in lignite (Emp_Lig1==1)
		di "durrisk cell `i'"
		capture noisily estpost sum durrisk if Emp_Lig==1 & cell == `i'
		if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
		putexcel E1=("time at risk") E`j'=matrix(e(sum))
		`putexcelclose'
		}
		*deltalig
		* Transitions : not right-censored (end==1) and pretrans==7 or 8
		di "transition cell `i'"
		capture noisily estpost sum end if (pretrans==7 | pretrans==8) & cell == `i'
		if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
		putexcel C1=("transitions") C`j'=matrix(e(sum))
		`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltalig") modify
				putexcel B1=("deltalig") B`j'=formula(=C`j'/E`j')
				`putexcelclose'
			}
		}
	}
			
	di "Job loss rate (by cell -unemployment after nonlignite only)"
	* Number of distinct persons in cells at risk (working in non lignite)
	cap drop dummy infopers countinfopers
	gen dummy=0
	replace dummy=1 if Emp_NonLig==1 & durrisk!=0
	bys persnr cell: egen infopers=max(dummy)
	bys persnr cell: gen countinfopers=1 if (infopers==1 & _n==1)
	
	forvalues i = 1/$cellnumber {
		local j=`i'+2
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
		putexcel A`j'=("`i'")
		`putexcelclose'
		* Number of distinct persons in cells at risk (working in non lignite)
		di "distinct cell `i'"
		capture noisily estpost sum countinfopers if cell == `i'
		if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
		putexcel D1=("nb distinct persons at risk") D`j'=matrix(e(count))
		`putexcelclose'
		}
		* Exposure time : time of employment NOT in lignite (Emp_NoLig==1) 
		di "durrisk cell `i'"
		capture noisily estpost sum durrisk if Emp_NonLig==1 & cell == `i'
		if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
		putexcel E1=("time at risk") E`j'=matrix(e(sum))
		`putexcelclose'
		}
		*deltanonlig
		* Transitions : not right-censored (end==1) and pretrans==11 or 12
		di "transition cell `i'"
		capture noisily estpost sum end if (pretrans==11 | pretrans==12) & cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
			putexcel C1=("transitions") C`j'=matrix(e(sum))
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("deltanonlig") modify
				putexcel B1=("deltanonlig") B`j'=formula(=C`j'/E`j')
				`putexcelclose'
			}
		}
	}
		
	/* -------------------------------------------------- */
	 *  (8c) Job offer arrival rate / Job finding rate lambda
	/* -------------------------------------------------- */	

		* 1) lambdalig		-> finding a job in lignite if unemployed (=0 by assumption) after lignite - posttrans = 8 (lignite - unemp/ALMP/marg - job in lignite)
		* 2) lambdanonlig	-> finding a job in non-lignite if unemployed after lignite - posttrans = 7 (lignite - unemp/ALMP/marg - job in non-lignite)
		* 3) lambdazerolig	-> finding a job in non-lignite if unemployed after non-lignite - posttrans = 11 (non-lignite - unemp/ALMP/marg - job in non-lignite)	
		
		* we can define the estimator as the total number of transitions divided by the total exposure time(=time in unemployment after lignite or after non lignite) 		 
	
	* by cells

	di "Lignite leavers' Job finding rate (ending in a new lignite job)"
	* Number of distinct persons in cells unemployed after lignite
			* - ending in non lignite or lignite employment (end==1 & (posttrans==7|posttrans==8))  
			* - OR censored (end==0)
	cap drop dummy infopers countinfopers
	gen dummy=0
	replace dummy=1 if Unemp_postLig==1 & ((end==1 & (posttrans==7 | posttrans==8)) | end==0) & durrisk!=0
	bys persnr cell: egen infopers=max(dummy)
	bys persnr cell: gen countinfopers=1 if (infopers==1 & _n==1)
		
	forvalues i = 1/$cellnumber {
		local j=`i'+2
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
		putexcel A`j'=("`i'")
		`putexcelclose'
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
		putexcel A`j'=("`i'")
		`putexcelclose'
		* Number of distinct persons in cells unemployed after lignite
				* - ending in non lignite or lignite employment (end==1 & (posttrans==7|posttrans==8))  
				* - OR censored (end==0)
		di "distinct cell `i'"
		capture noisily estpost sum countinfopers if cell == `i'
		if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
		putexcel D1=("nb distinct persons at risk") D`j'=matrix(e(count))
		`putexcelclose'
		*same for lambdanonlig
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
		putexcel D1=("nb distinct persons at risk") D`j'=matrix(e(count))
		`putexcelclose'
		}
		* Exposure time : time of unemployment after lignite (Unemp_postLig==1) 
				* - ending in non lignite or lignite employment (end==1 & (posttrans==7|posttrans==8))  
				* - OR censored (end==0)
		di "durrisk cell `i'"
		capture noisily estpost sum durrisk if Unemp_postLig==1 & ((end==1 & (posttrans==7 | posttrans==8)) | end==0) & cell == `i'
		if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
		putexcel E1=("time at risk") E`j'=matrix(e(sum))
		*same for lambdanonlig
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
		putexcel E1=("time at risk") E`j'=matrix(e(sum))
		`putexcelclose'
		}
		*lambdalig
		* Transitions : not right-censored (end==1) and posttrans==8 (Lig)
		di "transition cell `i'"
		capture noisily estpost sum end if posttrans==8 & cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
			putexcel C1=("transitions") C`j'=matrix(e(sum))
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdalig") modify
				putexcel B1=("lambdalig") B`j'=formula(=C`j'/E`j')
				`putexcelclose'
			}
		}
	}
			
	di "Lignite leavers' Job finding rate (ending in a new non-lignite job)"
	
	* Number of distinct persons in cells:  same as lambdalig, cf above
	
	forvalues i = 1/$cellnumber {
		local j=`i'+2
		* Exposure time : same as lambdalig, cf above
		*lambdalig
		* Transitions : not right-censored (end==1) and posttrans==7 (NonLig)
		di "transition cell `i'"
		capture noisily estpost sum end if posttrans==7 & cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
			putexcel C1=("transitions") C`j'=matrix(e(count))
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdanonlig") modify
				putexcel B1=("lambdanonlig") B`j'=formula(=C`j'/E`j')
				`putexcelclose'
			}
		}
	}

	di "Non Lignite leavers' Job finding rate (ending in a new non-lignite job)"
	* Number of distinct persons in cells unemployed after non lignite
		* - ending in Non lignite or lignite employment (end==1 & (posttrans==11 | posttrans==12) 
		* - OR censored (end==0)
	cap drop dummy infopers countinfopers
	gen dummy=0
	replace dummy=1 if Unemp_postNonLig==1 & ((end==1 & (posttrans==11 | posttrans==12)) | end==0) & durrisk!=0
	bys persnr cell: egen infopers=max(dummy)
	bys persnr cell: gen countinfopers=1 if (infopers==1 & _n==1)
	
	forvalues i = 1/$cellnumber {
		local j=`i'+2
		putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
		putexcel A`j'=("`i'")
		`putexcelclose'
		* Number of distinct persons in cells unemployed after non lignite
		* - ending in Non lignite or lignite employment (end==1 & (posttrans==11 | posttrans==12) 
		* - OR censored (end==0)
		di "distinct cell `i'"
		capture noisily estpost sum countinfopers if cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
			putexcel D1=("nb distinct persons at risk") D`j'=matrix(e(count))
			`putexcelclose'
		}
		* Exposure time : time of unemployment after non lignite (Unemp_postnonLig==1) 
				* - ending in Non lignite or lignite employment (end==1 & (posttrans==11 | posttrans==12) 
				* - OR censored (end==0)
		di "durrisk cell `i'"
		capture noisily estpost sum durrisk if Unemp_postNonLig==1 & ((end==1 & (posttrans==11 | posttrans==12)) | end==0) & cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
			putexcel E1=("time at risk") E`j'=matrix(e(sum))
			`putexcelclose'
		}
		*lambdazerolig
		* Transitions : not right-censored (end==1) and posttrans==11
		di "transition cell `i'"
		capture noisily estpost sum end if posttrans==11 & cell == `i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
			putexcel C1=("transitions") C`j'=matrix(e(sum))
			`putexcelclose'
			if ${iab}==1{
				putexcel set results/${samplefolder}/5_sample${sample}_wholesampleAndcells_estimates_Censoring, sheet("lambdazerolig") modify
				putexcel B1=("lambdazerolig") B`j'=formula(=C`j'/E`j')
				`putexcelclose'
			}
		}
	}

}		
clear all

/* ------------------------------------------------------------------------ */
 *  (9) Wage offer distribution
/* ------------------------------------------------------------------------ */

	/* After transitioning out of lignite, the model assumes that individuals have 
	a period of unemployment before finding a new job. 

   We are interested in two wages:
 	- wc (variable wcoal) is the wage that was paid in coal (= wage before transition to unemployment)
	- w' (variable woffer) is the wage paid in next job ( = wage after unemployment)

* (9.0) Reload data before collating spells (USE PRECOLL.DTA) 
		* KEEP only spells with info on wage at the beginning or at the end of the spell
		* Define CELLS again, with different point in time for macro condition depending on spell in lignite or not (MACRO_WAGE)
-> SAVE WAGE_SAMPLE.DTA
* (9.1) average wc by cell (wcoal) - characterize distrbution (normal and lognormal)
* (9.2) average w' by cell (woffer) - characterize distrbution (normal and lognormal)

* (9.3) We then assume w' is a function of wc and estimate for each cell
* a simple linear relationship using different specifications: 
* - reg w' wc i.year
* - reg w' wc i.decades
* - reg w' wc  year
* - reg w' wc decades

* (9.4) We are then able to do back of the envelope calculations for the expected 
* value of continued employment, using the values estimated for rho (ex-mu), delta‚ lambda
* and some additional assumptions:
* - b = 0.5 wc
* - deltaB = 0
*
* (9.5) Elements for JAERE revision
* (9.5.1) Evolution of coal versus non-coal wages over time
* (9.5.2) Distribution of first age of working in data
* (9.5.3) wage growth profiles of workers in different age ranges - by age ranges
*/

*********************************************************************************

* Reload data before collating spells and define cells again
* we need to reload the data since we have removed first of consecutive employment spells at start of (6)
use ${data}\precoll.dta, clear

*(9.0) Prepare
tab statsimple if tentg_end==. & tentg_beg==.
tab statsimple if tentg_end==. & tentg_beg!=.
tab statsimple if tentg_end!=. & tentg_beg==. 


* KEEP only spells with info on wage at the beginning or at the end of the spell
keep if (tentg_end!=. | tentg_beg!=.)

* We create cells again

* For macro condition we consider different point in time
* For spells in lignite: we are interested in the wage at the end of the spell, so we allocate spells in cell depending on macro situation at the end of the spell
* For spells not in lignite : we are interested in the wage at the beginning of the spell, so we allocate spells in cell depenending on macro situation at the beginning of the spell* Definition of macro scenario

		* bad macro conditions = 1 / good macro conditions = 2
		* approximations here
		* (1) good macro is less than 10% unemployment
		* (2) 	Lausitz:    from (incl) 2015-
		* 		Rheinisches from (incl) 2007 -
		*       Helmstedter from (incl) 2008 - 
		* 		Mitteldeutsches from (incl) 2016 -
		
	* Macro condition at end of spell
		cap drop macro_end
		g macro_end=.

		replace macro_end=1 if mining_area== 1 & endepi<mdy(01,01,2015)
		replace macro_end=1 if mining_area== 2 & endepi<mdy(01,01,2016)
		replace macro_end=1 if mining_area== 3 & endepi<mdy(01,01,2008)
		replace macro_end=1 if mining_area== 4 & endepi<mdy(01,01,2007)
		
		replace macro_end=2 if mining_area== 1 & endepi>=mdy(01,01,2015)
		replace macro_end=2 if mining_area== 2 & endepi>=mdy(01,01,2016)
		replace macro_end=2 if mining_area== 3 & endepi>=mdy(01,01,2008)
		replace macro_end=2 if mining_area== 4 & endepi>=mdy(01,01,2007)

		* West-Germany: Unemployment below 10% in all years since 1990 
		replace macro_end=2 if (mining_area==. | mining_area==5) & ao_bula<12

		* East Germany: Unemployment below 10% for women since 2012, for men since 2015 
		replace macro_end=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi<mdy(01,01,2012) & frau==1
		replace macro_end=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi<mdy(01,01,2014) & frau==0
		replace macro_end=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi>=mdy(01,01,2012) & frau==1
		replace macro_end=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi>=mdy(01,01,2014) & frau==0

		if ${sample}==7{
		replace macro_end=1 if endepi<mdy(01,01,2000)
		replace macro_end=2 if endepi>=mdy(01,01,2000)
		}
		
		tab macro_end, m
		
		cap drop macro_beg
		g macro_beg=.

		replace macro_beg=1 if mining_area== 1 & begepi<mdy(01,01,2015)
		replace macro_beg=1 if mining_area== 2 & begepi<mdy(01,01,2016)
		replace macro_beg=1 if mining_area== 3 & begepi<mdy(01,01,2008)
		replace macro_beg=1 if mining_area== 4 & begepi<mdy(01,01,2007)
		
		replace macro_beg=2 if mining_area== 1 & begepi>=mdy(01,01,2015)
		replace macro_beg=2 if mining_area== 2 & begepi>=mdy(01,01,2016)
		replace macro_beg=2 if mining_area== 3 & begepi>=mdy(01,01,2008)
		replace macro_beg=2 if mining_area== 4 & begepi>=mdy(01,01,2007)

		* West-Germany: Unemployment below 10% in all years since 1990 
		replace macro_beg=2 if (mining_area==. | mining_area==5) & ao_bula<12

		* East Germany: Unemployment below 10% for women since 2012, for men since 2015 
		replace macro_beg=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & begepi<mdy(01,01,2012) & frau==1
		replace macro_beg=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & begepi<mdy(01,01,2014) & frau==0
		replace macro_beg=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & begepi>=mdy(01,01,2012) & frau==1
		replace macro_beg=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & begepi>=mdy(01,01,2014) & frau==0		
		
		if ${sample}==7{
		replace macro_beg=1 if endepi<mdy(01,01,2000)
		replace macro_beg=2 if endepi>=mdy(01,01,2000)
		}
		
	* Macro condition at beginning of spell
		tab macro_beg, m
		label define macroLAB 1 "hi-unemp" 2 "lo-unemp"
		
		label values macro_end macroLAB
		label values macro_beg macroLAB
		
	* Macro condition kept to allocate spell in cells
		gen macro_wage=.
		replace macro_wage= macro_end if thisspelllignite==1 // we consider macro ondition at end of spell for lignite spells
		replace macro_wage= macro_beg if thisspelllignite==0 // we consider macro conditions at beginning of spell for non lignite spells
		label values macro_wage macroLAB

		tab macro_wage, m

	/* Cell groups (2*2*3*2 = 24 cells) unless ${sample}==7, then only 2 cells
	- gender (2 values)
	- education (2 values)
	- age category (3 values)
	- macro conditions (2 values)

	 NB not: sector (3 values); mining area (2 values); tenure; occupations
	*/	
	
/* ------------------------------------------------------------------------ */
 * 	CHECK MISSING FOR CHARACTERISTICS DEFINING CELLS
/* ------------------------------------------------------------------------ */
* All cells
tab ageend, m
tab agecat2, m		
tab frau, m
tab educ2, m
tab macro_wage, m	

* Only men (cells 1 to 12)
tab ageend if frau==0, m
tab agecat2 if frau==0, m		
tab educ2 if frau==0, m
tab macro_wage if frau==0, m

* AERE 26th November 2022	
cap drop cell
if ${sample}<7{
egen cell=group(frau educ2 agecat2 macro_wage), label	
global cellnumber "24"	
}
if ${sample}==7{
egen cell=group(macro_wage), label
global cellnumber "2"
}
if ${sample}==8{
* JAERE sample for different occupations
cap drop cell
egen cell=group(beruf12), label
global cellnumber "11"
}
if ${sample}==9{
* JAERE sample for different regions
cap drop cell
gen cell=.
* cells 1-4 = four mining areas (1=Lausitz, 2=Mitteld., 3=Helmstedt, 4=Rheinisch, 5=Other) 
replace cell = mining_area
* cell 5 = West Germany
replace cell = 5 if (mining_area==. | mining_area==5) & ao_bula < 12
* cell 6 = East Germany
replace cell = 6 if (mining_area==. | mining_area==5) & (ao_bula>12 & ao_bula<.)
global cellnumber "6"
}

/* ------------------------------------------------------------------------ */
 * 	SAVE AFTER CREATING CELLS -> WAGE_SAMPLE.DTA
/* ------------------------------------------------------------------------ */	

if ${sample}<7{
save ${data}\wage_sample.dta, replace
}
* To avoid writing over wage sample and potentially using 
* the wrong wage sample in 7biocoal, save using diff names.
if ${sample}==7{
save ${data}\wage_sample7.dta, replace
}
if ${sample}==8{
save ${data}\wage_sample8.dta, replace
}
if ${sample}==9{
save ${data}\wage_sample9.dta, replace
}
* We have multiple measures of wages - the ones we work with in 
* article are: for COAL WAGES [wcoal_end_monthly]
*              for NON-COAL WAGES [wnc_offer_tentg_beg]

di "Monthly wage by year"
g wage_monthly = tentgelt*tentgeltdays

tab jahrend if wage_monthly!=., matrow(matname) // get years where we have observations
matrix list matname
local dim = rowsof(matname)
display `dim'
	
global year
	
forvalues i=1/`dim' {
	global year $year matname[`i',1]
}
display $year
	
foreach i of global year {
	local j = `i'-1973
	capture noisily estpost sum tentgelt if jahrend==`i'
	if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("daily_wage_byyear") modify
		putexcel A`j'=(`i') B1=("wage_daily") B`j'=matrix(e(mean))
		`putexcelclose'
	}
	capture noisily estpost sum wage_monthly if jahrend==`i'
	if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("monthly_wage_byyear") modify
		putexcel A`j'=(`i') B1=("wage_monthly") B`j'=matrix(e(mean))
		`putexcelclose'
	}
}


*(9.1) Average wage in lignite before transition to unemployment - Characterize distribution
* Use pretrans = 7 which indicates the lignite spell in (lignite - unemp - other normalemp not in lignite)
di "Number of spells of employment in lignite followed by unemployement and then employment not in lignite"
count if thisspelllignite == 1 & pretrans == 7

	* generate variable that contains lignite income pre-transition
	cap drop wcoal_suminc_R wcoal_tentg_end wcoal_monthly wcoal_end_monthly
	
	gen wcoal_suminc_R = .
	replace wcoal_suminc_R = suminc_R  if thisspelllignite == 1 & pretrans == 7 //  add additional restrictions ? 
	
	gen wcoal_tentg_end = .
	replace wcoal_tentg_end = tentg_end if thisspelllignite == 1 & pretrans == 7 //  add additional restrictions ? 

	gen wcoal_end_monthly = .
	replace wcoal_end_monthly = tentg_end*tentgeltdays if thisspelllignite == 1 & pretrans == 7 //  add additional restrictions ? 
	label var wcoal_end_monthly "monthly wage in coal"

	gen wctentend_preimp = .
	replace wctentend_preimp = tentend_preimp if thisspelllignite == 1 & pretrans == 7 //  add additional restrictions ? 
	label var wctentend_preimp "daily last wage in coal pre-top-code-impute"

	
	* full sample - distribution analysis
	di "Distribution of incomes in lignite before transition to unemployment with suminc_R"
	sum wcoal_suminc_R, d 	
	
	di "Distribution of wage in lignite before transition to unemployment with tentg_end daily"
	estpost sum wcoal_tentg_end, d

	di "Distribution of wage in lignite before transition to unemployment with tentg_end monthly"
	*monthly wage by year
	tab jahrend if wcoal_end_monthly!=., matrow(matname) // get years where we have observations
	matrix list matname
	local dim = rowsof(matname)
	display `dim'
	
	global year
	forvalues i=1/`dim' {
		global year $year matname[`i',1]
	}
	display $year
	
	foreach i of global year {
		local j = `i'-1973
		capture noisily estpost sum wcoal_tentg_end if jahrend==`i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("daily_wage_byyear") modify
			putexcel A`j'=(`i') C1=("wcoal_end_daily") C`j'=matrix(e(mean))
			`putexcelclose'
		}
		capture noisily estpost sum wcoal_end_monthly if jahrend==`i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("monthly_wage_byyear") modify
			putexcel A`j'=(`i') C1=("wcoal_end_monthly") C`j'=matrix(e(mean))
			`putexcelclose'
		}
	}
	
	*full sample - normal distribution paramaters
	capture noisily estpost sum wcoal_end_monthly, d
	if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wcoal_normal") modify
		#delimit ; 
		putexcel A2=("wcoal_end_monthly_all") B1=("mean") B2=matrix(e(mean)) C1=("sd") C2=matrix(e(sd)) D1=("sum_w") D2=matrix(e(sum_w)) E1=("skewness") E2=matrix(e(skewness)) F1=("kurtosis") F2=matrix(e(kurtosis)) G1=("sum") G2=matrix(e(sum)) H1=("min") H2=matrix(e(min)) I1=("max") I2=matrix(e(max)) J1=("p1") J2=matrix(e(p1)) K1=("p5") K2=matrix(e(p5)) L1=("p10") L2=matrix(e(p10)) M1=("p25") M2=matrix(e(p25)) N1=("p50") N2=matrix(e(p50)) O1=("p75") O2=matrix(e(p75)) P1=("p90") P2=matrix(e(p90)) Q1=("p95") Q2=matrix(e(p95)) R1=("p99") R2=matrix(e(p99)) ;
		#delimit cr
		`putexcelclose'
	}	
	
	*full sample - normal distribution fit
	dpplot wcoal_end_monthly if wcoal_end_monthly<=$incmax, caption("") mlw(medthin) ms(oh) scheme(economist)
	graph export results/${samplefolder}/5_sample${sample}_wcoal_dpplot.pdf, replace
	kdensity wcoal_end_monthly, normal 
	graph export results/${samplefolder}/5_sample${sample}_wcoal_kdensity.pdf, replace
	qnorm wcoal_end_monthly
	graph export results/${samplefolder}/5_sample${sample}_wcoal_qnorm.pdf, replace
	sktest wcoal_end_monthly

	*full sample - lognormal distribution parameters
	lognfit wcoal_end_monthly, stats
	preserve
	keep if e(sample)==1
	if ${sample}<7{
		save ${data}\wage_sample_wcoal_estimation.dta, replace
	}
	if ${sample}==7{
		save ${data}\wage_sample7_wcoal_estimation.dta, replace
	}
	if ${sample}==8{
		save ${data}\wage_sample8_wcoal_estimation.dta, replace
	}
	if ${sample}==9{
		save ${data}\wage_sample9_wcoal_estimation.dta, replace
	}

	tab pretrans
	restore
	putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wcoal_lognormal") modify
	putexcel A2=("wcoal_end_monthly_all") B1=("mu") B2=(e(bm)) C1=("sigma") C2=(e(bv)) 
	`putexcelclose'
	
	*full sample - lognormal distribution fit
	dpplot wcoal_end_monthly if wcoal_end_monthly<=$incmax, dist(lognormal) param(`e(bm)' `e(bv)') caption("") mlw(medthin) ms(oh) scheme(economist) 
	graph export results/${samplefolder}/5_sample${sample}_wcoal_dpplot_logn.pdf, replace

	_pctile wcoal_end_monthly, nq(1000)	
	return list

	_pctile wcoal_end_monthly if wcoal_end_monthly<=$incmax, nq(1000)	
	return list
	
	* full sample: contrast pre-vs-post-imputation coal & non-coal wages
	sum wctentend_preimp wcoal_end_monthly, d
	
	* by cells - distribution analysis 
	forvalues i = 1/$cellnumber {
		local j = `i'+2
		di "Distribution of wage in lignite before transition to unemployment for cell `i'"	
			*by cells - normal distribution paramaters
			capture noisily estpost sum wcoal_end_monthly if cell == `i', d 
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wcoal_normal") modify
				#delimit ; 
				putexcel A`j'=("wcoal_end_monthly_`i'") B`j'=matrix(e(mean)) C`j'=matrix(e(sd)) D`j'=matrix(e(sum_w)) E`j'=matrix(e(skewness)) 
				F`j'=matrix(e(kurtosis)) G`j'=matrix(e(sum)) H`j'=matrix(e(min)) I`j'=matrix(e(max)) J`j'=matrix(e(p1)) K`j'=matrix(e(p5)) 
				L`j'=matrix(e(p10)) M`j'=matrix(e(p25)) N`j'=matrix(e(p50)) O`j'=matrix(e(p75)) P`j'=matrix(e(p90)) Q`j'=matrix(e(p95)) R`j'=matrix(e(p99));
				#delimit cr
			`	putexcelclose'
			}
			*by cells - normal distribution fit
			capture noisily dpplot wcoal_end_monthly  if cell == `i' & wcoal_end_monthly<=$incmax, caption("")
			if _rc!=2000 & _rc!=2001 {
				graph export results/${samplefolder}/5_sample${sample}_wcoal_dpplot_`i'.pdf, replace
			}
			capture noisily kdensity wcoal_end_monthly if cell == `i', normal
			if _rc!=2000 & _rc!=2001 & _rc!=198{
				graph export results/${samplefolder}/5_sample${sample}_wcoal_kdensity_`i'.pdf, replace
			}
			capture noisily qnorm wcoal_end_monthly if cell == `i'
			if _rc!=2000 & _rc!=2001{
			graph export results/${samplefolder}/5_sample${sample}_wcoal_qnorm_`i'.pdf, replace
			}
			capture noisily sktest wcoal_end_monthly if cell == `i'
			
			*by cells - lognormal distribution parameters
			capture noisily lognfit wcoal_end_monthly if cell == `i', stats
			if _rc!=2000 & _rc!=2001{	
				putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wcoal_lognormal") modify
				putexcel A`j'=("wcoal_end_monthly_`i'") B`j'=(e(bm)) C`j'=(e(bv)) 
				`putexcelclose'
			}
			
			*by cells - lognormal distribution fit
			capture noisily dpplot wcoal_end_monthly if cell == `i' & wcoal_end_monthly<=$incmax, dist(lognormal) param(`e(bm)' `e(bv)') caption("") 
			if _rc!=2000 & _rc!=2001{
				graph export results/${samplefolder}/5_sample${sample}_wcoal_dpplot_logn_`i'.pdf, replace
			}
	}

		
*(9.2) Average offer wage after unemployment (in non-lignite only) - Characterize distribution
di "Number of spells after unemployment (in non-lignite only)"
count if posttrans[_n-1] == 7 & pid == pid[_n-1]

	* generate w' (woffer) for each suitable spell
	* full sample
	sort pid begepi
	cap drop wnc_offer_tentg_beg wnc_offer_suminc_R wnc_offer_beg_monthly

	gen wnc_offer_suminc_R = .
	sort pid begepi
	replace wnc_offer_suminc_R = suminc_R if posttrans[_n-1] == 7 & pid == pid[_n-1]

	gen wnc_offer_tentg_beg = .
	sort pid begepi
	replace wnc_offer_tentg_beg = tentg_beg if posttrans[_n-1] == 7 & pid == pid[_n-1]
	
	gen wnc_offer_beg_monthly = .
	sort pid begepi
	replace wnc_offer_beg_monthly = tentg_beg*tentgeltdays if posttrans[_n-1] == 7 & pid == pid[_n-1]
	label var wnc_offer_beg_monthly "monthly starting wage in non-coal"

	* create same wnc variable (non-coal starting wages after unemp) for pre-imputed wages:
	gen wnctentbeg_preimp = .
	sort pid begepi
	replace wnctentbeg_preimp = tentbeg_preimp if posttrans[_n-1] == 7 & pid == pid[_n-1]
	label var wnctentbeg_preimp "monthly start wage in non-coal pre-top-code-impute"	

	* compare wnc pre- and post-imputation
	sum wnctentbeg_preimp wnc_offer_beg_monthly, d
	
	* full sample
	di "Distribution of wage offer no coal (w') after unemployment (new job in non-lignite) with suminc_R"
	sum wnc_offer_suminc_R, d	
	
	di "Distribution of wage offer no coal (w') after unemployment (new job in non-lignite) with tentg_beg daily"
	sum wnc_offer_tentg_beg, d
	
	di "Distribution of wage offer no coal (w') after unemployment (new job in non-lignite) with tentg_beg monthly"
	*monthly wage by year
	tab jahrend if wnc_offer_beg_monthly!=., matrow(matname) // get years where we have observations
	matrix list matname
	local dim = rowsof(matname)
	display `dim'
	
	global year
	
	forvalues i=1/`dim' {
		global year $year matname[`i',1]
	}
	display $year
	
	foreach i of global year {
		local j = `i'-1973
		capture noisily estpost sum wnc_offer_tentg_beg if jahrend==`i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("daily_wage_byyear") modify
			putexcel A`j'=(`i') D1=("wnc_offer_daily") D`j'=matrix(e(mean))
			`putexcelclose'
		}
		capture noisily estpost sum wnc_offer_beg_monthly if jahrend==`i'
		if _rc!=2000{
			putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("monthly_wage_byyear") modify
			putexcel A`j'=(`i') D1=("wnc_offer_monthly") D`j'=matrix(e(mean))
			`putexcelclose'
		}
	}
	
	*full sample - normal distribution paramaters
	capture noisily estpost sum wnc_offer_beg_monthly, d
	if _rc!=2000{
		putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wnc_offer_normal") modify
		#delimit ; 
		putexcel A2=("wnc_offer_beg_monthly_all") B1=("mean") B2=matrix(e(mean)) C1=("sd") C2=matrix(e(sd)) D1=("sum_w") D2=matrix(e(sum_w)) E1=("skewness") E2=matrix(e(skewness)) 
		F1=("kurtosis") F2=matrix(e(kurtosis)) G1=("sum") G2=matrix(e(sum)) H1=("min") H2=matrix(e(min)) I1=("max") I2=matrix(e(max)) J1=("p1") J2=matrix(e(p1)) 
		K1=("p5") K2=matrix(e(p5)) L1=("p10") L2=matrix(e(p10)) M1=("p25") M2=matrix(e(p25)) N1=("p50") N2=matrix(e(p50)) O1=("p75") O2=matrix(e(p75)) P1=("p90") P2=matrix(e(p90)) 
		Q1=("p95") Q2=matrix(e(p95)) R1=("p99") R2=matrix(e(p99)); 
		#delimit cr
		`putexcelclose'
	}
	_pctile wnc_offer_beg_monthly, nq(1000)	
	return list
	
	_pctile wnc_offer_beg_monthly if wnc_offer_beg_monthly<=$incmax, nq(1000)	
	return list

	*full sample - normal distribution fit
	dpplot wnc_offer_beg_monthly if wnc_offer_beg_monthly<=$incmax, caption("") mlw(medthin) ms(oh) scheme(economist)
	graph export results/${samplefolder}/5_sample${sample}_wnc_offer_dpplot.pdf, replace
	kdensity wnc_offer_beg_monthly, normal
	graph export results/${samplefolder}/5_sample${sample}_wnc_offer_kdensity.pdf, replace
	qnorm wnc_offer_beg_monthly 
	graph export results/${samplefolder}/5_sample${sample}_wnc_offer_qnorm.pdf, replace
	sktest wnc_offer_beg_monthly

	*full sample - lognormal distribution parameters
	lognfit wnc_offer_beg_monthly, stats
	preserve
	keep if e(sample)==1
	if ${sample}<7{
		save ${data}\wage_sample_wnc_estimation.dta, replace
	}
	if ${sample}==7{
		save ${data}\wage_sample7_wnc_estimation.dta, replace
	}
	if ${sample}==8{
		save ${data}\wage_sample8_wnc_estimation.dta, replace
	}
	if ${sample}==9{
		save ${data}\wage_sample9_wnc_estimation.dta, replace
	}

	tab posttrans
	restore
	
	putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wnc_offer_lognormal") modify
	putexcel A2=("wnc_offer_beg_monthly_all") B1=("mu") B2=(e(bm)) C1=("sigma") C2=(e(bv)) 
	`putexcelclose'
	*full sample - lognormal distribution fit
	dpplot wnc_offer_beg_monthly if wnc_offer_beg_monthly<=$incmax, dist(lognormal) param(`e(bm)' `e(bv)') caption("") mlw(medthin) ms(oh) scheme(economist)
	graph export results/${samplefolder}/5_sample${sample}_wnc_offer_dpplot_logn.pdf, replace
	
	* by cells
	forvalues i = 1/$cellnumber {
	di "Distribution of wage offer after unemployment for cell `i'"	
			local j = `i'+2
			*by cells - normal distribution paramaters
			capture noisily estpost sum wnc_offer_beg_monthly if cell == `i' , d 
			if _rc!=2000{
				putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wnc_offer_normal") modify
				#delimit ; 
				putexcel A`j'=("wnc_offer_beg_monthly_`i'") B`j'=matrix(e(mean)) C`j'=matrix(e(sd)) D`j'=matrix(e(sum_w)) E`j'=matrix(e(skewness)) 
				F`j'=matrix(e(kurtosis)) G`j'=matrix(e(sum)) H`j'=matrix(e(min)) I`j'=matrix(e(max)) J`j'=matrix(e(p1)) K`j'=matrix(e(p5)) 
				L`j'=matrix(e(p10)) M`j'=matrix(e(p25)) N`j'=matrix(e(p50)) O`j'=matrix(e(p75)) P`j'=matrix(e(p90)) Q`j'=matrix(e(p95)) R`j'=matrix(e(p99)) ;
				#delimit cr
				`putexcelclose'
			}
			*by cells - normal distribution fit
			capture noisily dpplot wnc_offer_beg_monthly if cell == `i' & wnc_offer_beg_monthly<=$incmax, caption("")
			if _rc!=2000 & _rc!=2001 {
				graph export results/${samplefolder}/5_sample${sample}_wnc_offer_dpplot_`i'.pdf, replace
			}
			capture noisily kdensity wnc_offer_beg_monthly if cell == `i', normal
			if _rc!=2000 & _rc!=2001 & _rc!=198{
				graph export results/${samplefolder}/5_sample${sample}_wnc_offer_kdensity_`i'.pdf, replace
			}
			capture noisily qnorm wnc_offer_beg_monthly if cell == `i'
			if _rc!=2000 & _rc!=2001 {
				graph export results/${samplefolder}/5_sample${sample}_wnc_offer_qnorm_`i'.pdf, replace
			}
			capture noisily sktest wnc_offer_beg_monthly if cell == `i'
			*by cells - lognormal distribution parameters
			capture noisily lognfit wnc_offer_beg_monthly if cell == `i', stats
			if _rc!=2000 & _rc!=2001 {
				putexcel set results/${samplefolder}/5_sample${sample}_wages_distribution, sheet("wnc_offer_lognormal") modify
				putexcel A`j'=("wnc_offer_beg_monthly_`i'") B`j'=(e(bm)) C`j'=(e(bv))  
				`putexcelclose'
			}
			*by cells - lognormal distribution fit
			capture noisily dpplot wnc_offer_beg_monthly if cell == `i' & wnc_offer_beg_monthly<=$incmax, dist(lognormal) param(`e(bm)' `e(bv)') caption("") scheme(economist)
			if _rc!=2000 & _rc!=2001 {
			graph export results/${samplefolder}/5_sample${sample}_wnc_offer_dpplot_logn_`i'.pdf, replace
			}
	}

	
* (9.3) We then assume w' is a function of wc and estimate for each cell
* a simple linear relationship using different specifications: 
* - reg w' wc  i.year
* - reg w' wc  i.decades
* - reg w' wc year
* - reg w* wc decades

* Nota bene: A spell can inform us on wcoal and woffer, if it is both a post-unemp
*			and a pre-unemp spell. NB. even then, tentg_end and tentg_beg may differ
*

di "Number of spells that inform us on both last wcoal and first woffer"
count if wcoal_end_monthly!=. & wnc_offer_beg_monthly!=.

sort pid begepi
replace wnc_offer_beg_monthly = wnc_offer_beg_monthly[_n+2] if pretrans == 7 & pid == pid[_n+2]

* full sample
capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  i.jahrend, robust
if _rc!=2000 & _rc!=2001 & ${iab}==1{
	matrix output1=r(table)
	mat reg_ijahrend=output1'
	matrix list reg_ijahrend
	putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_ijahrend, replace
	putexcel A1=matrix(reg_ijahrend), names
	`putexcelclose'
}

capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  jahrend, robust
if _rc!=2000 & _rc!=2001 & ${iab}==1{
	matrix output2=r(table)
	mat reg_jahrend=output2'
	matrix list reg_jahrend
	putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_jahrend, replace
	putexcel A1=matrix(reg_jahrend), names
	`putexcelclose'
}

capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  i.decades, robust
if _rc!=2000 & _rc!=2001 & ${iab}==1{
	matrix output3=r(table)
	mat reg_idecades=output3'
	matrix list reg_idecades
	putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_idecades, replace
	putexcel A1=matrix(reg_idecades), names
	`putexcelclose'
}

capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  decades, robust
if _rc!=2000 & _rc!=2001 & ${iab}==1{
	matrix output4=r(table)
	mat reg_decades=output4'
	matrix list reg_decades
	putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_decades, replace
	putexcel A1=matrix(reg_decades), names
	`putexcelclose'
}

forvalues i = 1/$cellnumber {
	capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  i.jahrend 	if cell == `i', robust
	if _rc!=2000 & _rc!=2001 & ${iab}==1{
		matrix output1_`i'=r(table)
		mat reg_ijahrend_`i'=output1_`i''
		matrix list reg_ijahrend_`i'
		putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_ijahrend_`i', replace
		putexcel A1=matrix(reg_ijahrend_`i'), names
		`putexcelclose'
	}
	capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  jahrend 		if cell == `i', robust
	if _rc!=2000 & _rc!=2001 & ${iab}==1{
		matrix output2_`i'=r(table)
		mat reg_jahrend_`i'=output2_`i''
		matrix list reg_jahrend_`i'
		putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_jahrend_`i', replace
		putexcel A1=matrix(reg_jahrend_`i'), names
		`putexcelclose'
	}
	capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  i.decades if cell == `i', robust
	if _rc!=2000 & _rc!=2001 & ${iab}==1{
		matrix output3_`i'=r(table)
		mat reg_idecades_`i'=output3_`i''
		matrix list reg_idecades_`i'
		putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_idecades_`i', replace
		putexcel A1=matrix(reg_idecades_`i'), names
		`putexcelclose'
	}
	capture noisily reg wnc_offer_beg_monthly wcoal_end_monthly  decades	if cell == `i', robust
	if _rc!=2000 & _rc!=2001 & ${iab}==1{
		matrix output4_`i'=r(table)
		mat reg_decades_`i'=output4_`i''
		matrix list reg_decades_`i'
		putexcel set  results/${samplefolder}/5_sample${sample}_wnc_offer_reg_decades_`i', replace
		putexcel A1=matrix(reg_decades_`i'), names
		`putexcelclose'
		}
} 

* (9.4) We are then able to do back of the envelope calculations for the expected 
* value of continued employment, using the values estimated for rho (ex-mu), delta‚ lambda
* and some additional assumptions:
* - b = 0.5 wc
* - deltaB = 0
*
**************************************************************************************
*** (9.5) Elements for JAERE revision											   ***
*** (9.5.1) Evolution of coal versus non-coal wages over time					   ***
*** (9.5.2) Distribution of first age of working in data						   ***
*** (9.5.3) wage growth profiles of workers in diff. age ranges - by age ranges    ***
**************************************************************************************

* NB. elements for JAERE revision also in section on transition 
*     outcomes (ca. sections 2.1-2.4)

*****************************************************************
*** (9.5.1) Evolution of coal versus non-coal wages over time ***
*****************************************************************

* starting wages in non-coal / * last wages in coal by year
forvalues i = 1975/$maxyear{
g wc_`i'=.
label var wc_`i' "last wage in coal if in year `i'"
replace wc_`i'= wcoal_end_monthly if jahrend==`i' & wcoal_end_monthly <= $incmax

g wnc_`i'=.
label var wnc_`i' "first wage in non-coal if in year `i'"
replace wnc_`i'= wnc_offer_beg_monthly if jahrbeg==`i' & wnc_offer_beg_monthly <= $incmax
count if jahrend==`i' & wnc_offer_beg_monthly > $incmax & wnc_offer_beg_monthly!=.
* LH4.8.2022: how many observations removed due to our own criteria 

g wc_ca5_`i'=.
replace wc_ca5_`i'= wcoal_end_monthly if (jahrend>=`i' - 5 & jahrend<`i' + 5) 
g wc_ca10_`i'=.
replace wc_ca10_`i'= wcoal_end_monthly if (jahrend>=`i' - 10 & jahrend<`i' + 10) 
g wc_ca15_`i'=.
replace wc_ca15_`i'= wcoal_end_monthly if (jahrend>=`i' - 15 & jahrend<`i' + 15) 

g wnc_ca5_`i'=.
replace wnc_ca5_`i'= wnc_offer_beg_monthly if (jahrbeg>=`i' - 5 & jahrbeg<`i' + 5)
g wnc_ca10_`i'=.
replace wnc_ca10_`i'= wnc_offer_beg_monthly if (jahrbeg>=`i' - 10 & jahrbeg<`i' + 10)
g wnc_ca15_`i'=.
replace wnc_ca15_`i'= wnc_offer_beg_monthly if (jahrbeg>=`i' - 15 & jahrbeg<`i' + 15)

* yearly averages & standard deviation coal
egen wc_mu_`i'=mean(wc_`i')
label var wc_mu_`i' "mean last monthly wages in coal in `i'"
egen wc_sd_`i'=sd(wc_`i')
label var wc_sd_`i' "st dev of last monthly wages in coal in year `i'"

* yearly averages & standard deviation non-coal
egen wnc_sd_`i'=sd(wnc_`i')
label var wnc_sd_`i' "st dev of last monthly wages in ncoal in year `i'"
egen wnc_mu_`i'=mean(wnc_`i')
label var wnc_mu_`i' "mean offered monthly wages in non-coal in `i'"


* moving averages: mean and standard deviation for 5 / 10 / 15 years
egen wc_mav5_`i'=mean(wc_ca5_`i')
label var wc_mav5_`i' "mean last wages in coal in `i' with 5 year-mov-average"
egen wc_sd5_`i'=sd(wc_ca5_`i')
label var wc_sd5_`i' "s.d. last wages in coal in `i' with 5 year-mov-average"
egen wnc_mav5_`i'=mean(wnc_ca5_`i')
label var wnc_mav5_`i' "mean offered wages in non-coal in `i' with 5 year-mov-average"
egen wnc_sd5_`i'=sd(wnc_ca5_`i')
label var wnc_sd5_`i' "s.d. offered wages in non-coal in `i' with 5 year-mov-average"

egen wc_mav10_`i'=mean(wc_ca10_`i')
label var wc_mav10_`i' "mean last wages in coal in `i' with 10 year-mov-average"
egen wc_sd10_`i'=sd(wc_ca10_`i')
label var wc_sd10_`i' "s.d. last wages in coal in `i' with 10 year-mov-average"
egen wnc_mav10_`i'=mean(wnc_ca10_`i')
label var wnc_mav10_`i' "mean offered wages in non-coal in `i' with 10 year-mov-average"
egen wnc_sd10_`i'=sd(wnc_ca10_`i')
label var wnc_sd10_`i' "s.d. offered wages in non-coal in `i' with 10 year-mov-average"

egen wc_mav15_`i'=mean(wc_ca15_`i')
label var wc_mav5_`i' "mean last wages in coal in `i' with 15 year-mov-average"
egen wc_sd15_`i'=sd(wc_ca15_`i')
label var wc_sd15_`i' "sd last wages in coal in `i' with 15 year-mov-average"
egen wnc_mav15_`i'=mean(wnc_ca15_`i')
label var wc_mav15_`i' "mean offered wages in non-coal in `i' with 15 year-mov-average"
egen wnc_sd15_`i'=sd(wnc_ca15_`i')
label var wnc_sd15_`i' "sd offered wages in non-coal in `i' with 15 year-mov-average"
}

forvalues i = 1975/$maxyear{
g wdiff_mu_`i' = wc_mu_`i'[1] - wnc_mu_`i'[1]
g wdiff_mav5_`i' = wc_mav5_`i'[1]-wnc_mav5_`i'[1]
g wdiff_mav10_`i' = wc_mav10_`i'[1]-wnc_mav10_`i'[1]
g wdiff_mav15_`i' = wc_mav15_`i'[1]-wnc_mav15_`i'[1]
}
su wdiff_mu*
su wdiff_mav5*
su wdiff_mav10*
su wdiff_mav15*

************************************************************
*** (9.5.2) Distribution of first age of working in data ***
************************************************************
g agebeglig_P75=.
replace agebeglig_P75=agebeg if jahrbeg>1975 & thisspelllignite==1
replace agebeglig_P75=. if (jahrbeg<=1975 | jahrbeg==.| thisspelllignite==0)
* given that we are trying to identify the age of recrutement, 
* we exclude spells that already started pre-1975
* by setting them to zero and then removing them - across all obs per person
bys persnr: egen agestart=min(agebeglig_P75) 

* only use one observation per person for statistics on age distribution
bys persnr: replace agestart=. if _n>1

su agestart, detail
tab agestart 

tab agestart if decades==3
tab agestart if decades==4

*******************************************************************************************
*** *** (9.5.3) wage growth profiles of workers in different age ranges - by age ranges ***
*******************************************************************************************
* the relevant variables are created in 1prepare,
* since consecutive spells are collated there

su tentgrowlig

********************************************
*** Differences in wage growth by decade ***
********************************************

su tentgrowlig

su tentgrowlig if decades==1

su tentgrowlig if decades==2

su tentgrowlig if decades==3

su tentgrowlig if decades==4


***********************************************
*** Differences in wage growth by age range ***
***********************************************

su tentgrowlig_twen 
	*"wage growth in lignite if aged up to 30"
su tentgrowlig_thir 
	*"wage growth in lignite if aged 30-40"
su tentgrowlig_four 
	*"wage growth in lignite if aged 40-50"
su tentgrowlig_five 
	*"wage growth in lignite if aged 50-60"
su tentgrowlig_sixp 
	*"wage growth in lignite if aged 60plus"

* spells ending post 2010 only: *
su tentgrowlig_twen if decades==4
	*"wage growth in lignite if aged up to 30"
su tentgrowlig_thir if decades==4
	*"wage growth in lignite if aged 30-40"
su tentgrowlig_four if decades==4
	*"wage growth in lignite if aged 40-50"
su tentgrowlig_five if decades==4
	*"wage growth in lignite if aged 50-60"
su tentgrowlig_sixp if decades==4
	*"wage growth in lignite if aged 60plus"

	
	
cap log close

clear all

